{'type': 'flops', 'content': [{'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5695213340928}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3796808916480}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4429610391296}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4746011128704}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4746011128704}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6011614078336}, {'type': 'perplexity', 'in_batch_dim': [8, 736], 'batch_size': 8, 'flops': 14554433988352}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3796808916480}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4113209653888}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3796808916480}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4746011128704}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2847606704256}, {'type': 'perplexity', 'in_batch_dim': [8, 320], 'batch_size': 8, 'flops': 6328014815744}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5378812603520}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3796808916480}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4746011128704}, {'type': 'perplexity', 'in_batch_dim': [8, 352], 'batch_size': 8, 'flops': 6960816290560}, {'type': 'perplexity', 'in_batch_dim': [8, 384], 'batch_size': 8, 'flops': 7593617765376}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5378812603520}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4429610391296}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4113209653888}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5062411866112}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5062411866112}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4429610391296}, {'type': 'perplexity', 'in_batch_dim': [8, 480], 'batch_size': 8, 'flops': 9492022189824}, {'type': 'perplexity', 'in_batch_dim': [8, 448], 'batch_size': 8, 'flops': 8859220715008}, {'type': 'perplexity', 'in_batch_dim': [8, 320], 'batch_size': 8, 'flops': 6328014815744}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3796808916480}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6011614078336}, {'type': 'perplexity', 'in_batch_dim': [8, 320], 'batch_size': 8, 'flops': 6328014815744}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5695213340928}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4746011128704}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4429610391296}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5378812603520}, {'type': 'perplexity', 'in_batch_dim': [8, 544], 'batch_size': 8, 'flops': 10757625139456}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4429610391296}, {'type': 'perplexity', 'in_batch_dim': [8, 320], 'batch_size': 8, 'flops': 6328014815744}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4113209653888}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4113209653888}, {'type': 'perplexity', 'in_batch_dim': [8, 320], 'batch_size': 8, 'flops': 6328014815744}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3480408179072}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5062411866112}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6011614078336}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4429610391296}, {'type': 'perplexity', 'in_batch_dim': [8, 384], 'batch_size': 8, 'flops': 7593617765376}, {'type': 'perplexity', 'in_batch_dim': [8, 416], 'batch_size': 8, 'flops': 8226419240192}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4746011128704}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3480408179072}, {'type': 'perplexity', 'in_batch_dim': [8, 384], 'batch_size': 8, 'flops': 7593617765376}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3480408179072}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3480408179072}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5378812603520}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3796808916480}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4113209653888}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5062411866112}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5378812603520}, {'type': 'perplexity', 'in_batch_dim': [8, 320], 'batch_size': 8, 'flops': 6328014815744}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4429610391296}, {'type': 'perplexity', 'in_batch_dim': [8, 160], 'batch_size': 8, 'flops': 3164007441664}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4113209653888}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5695213340928}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3796808916480}, {'type': 'perplexity', 'in_batch_dim': [8, 160], 'batch_size': 8, 'flops': 3164007441664}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6011614078336}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5062411866112}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4429610391296}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3480408179072}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4746011128704}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3480408179072}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4113209653888}, {'type': 'perplexity', 'in_batch_dim': [8, 624], 'batch_size': 8, 'flops': 12339628826496}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4113209653888}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5695213340928}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5062411866112}, {'type': 'perplexity', 'in_batch_dim': [8, 384], 'batch_size': 8, 'flops': 7593617765376}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4429610391296}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4113209653888}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5062411866112}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4429610391296}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5695213340928}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3796808916480}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5378812603520}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4429610391296}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5378812603520}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3796808916480}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5695213340928}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4429610391296}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5062411866112}, {'type': 'perplexity', 'in_batch_dim': [8, 160], 'batch_size': 8, 'flops': 3164007441664}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4746011128704}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5695213340928}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3796808916480}, {'type': 'perplexity', 'in_batch_dim': [8, 416], 'batch_size': 8, 'flops': 8226419240192}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3796808916480}, {'type': 'perplexity', 'in_batch_dim': [8, 160], 'batch_size': 8, 'flops': 3164007441664}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4746011128704}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3796808916480}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2847606704256}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5378812603520}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4429610391296}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4113209653888}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4429610391296}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6011614078336}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4113209653888}, {'type': 'perplexity', 'in_batch_dim': [8, 320], 'batch_size': 8, 'flops': 6328014815744}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4746011128704}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4746011128704}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3796808916480}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5062411866112}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3480408179072}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3796808916480}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3480408179072}, {'type': 'perplexity', 'in_batch_dim': [8, 352], 'batch_size': 8, 'flops': 6960816290560}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5062411866112}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4429610391296}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6011614078336}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5062411866112}, {'type': 'perplexity', 'in_batch_dim': [8, 576], 'batch_size': 8, 'flops': 11390426614272}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4429610391296}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3796808916480}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6011614078336}, {'type': 'perplexity', 'in_batch_dim': [8, 432], 'batch_size': 8, 'flops': 8542819977600}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5378812603520}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5378812603520}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5062411866112}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4113209653888}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4113209653888}, {'type': 'perplexity', 'in_batch_dim': [8, 368], 'batch_size': 8, 'flops': 7277217027968}, {'type': 'perplexity', 'in_batch_dim': [8, 160], 'batch_size': 8, 'flops': 3164007441664}, {'type': 'perplexity', 'in_batch_dim': [8, 368], 'batch_size': 8, 'flops': 7277217027968}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3796808916480}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4746011128704}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4113209653888}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4429610391296}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5695213340928}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5695213340928}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5062411866112}, {'type': 'perplexity', 'in_batch_dim': [8, 432], 'batch_size': 8, 'flops': 8542819977600}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5062411866112}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4113209653888}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3480408179072}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3480408179072}, {'type': 'perplexity', 'in_batch_dim': [8, 160], 'batch_size': 8, 'flops': 3164007441664}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4113209653888}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3796808916480}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5062411866112}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 368], 'batch_size': 8, 'flops': 7277217027968}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5378812603520}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5378812603520}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5062411866112}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2847606704256}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4429610391296}, {'type': 'perplexity', 'in_batch_dim': [8, 320], 'batch_size': 8, 'flops': 6328014815744}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3796808916480}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4429610391296}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4113209653888}, {'type': 'perplexity', 'in_batch_dim': [8, 384], 'batch_size': 8, 'flops': 7593617765376}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5378812603520}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4746011128704}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3480408179072}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5062411866112}, {'type': 'perplexity', 'in_batch_dim': [8, 320], 'batch_size': 8, 'flops': 6328014815744}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5378812603520}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4113209653888}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4113209653888}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5695213340928}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3480408179072}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6011614078336}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4746011128704}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5378812603520}, {'type': 'perplexity', 'in_batch_dim': [8, 320], 'batch_size': 8, 'flops': 6328014815744}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4429610391296}, {'type': 'perplexity', 'in_batch_dim': [8, 320], 'batch_size': 8, 'flops': 6328014815744}, {'type': 'perplexity', 'in_batch_dim': [8, 416], 'batch_size': 8, 'flops': 8226419240192}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5062411866112}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5062411866112}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2847606704256}, {'type': 'perplexity', 'in_batch_dim': [8, 336], 'batch_size': 8, 'flops': 6644415553152}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6011614078336}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5062411866112}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4113209653888}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5695213340928}, {'type': 'perplexity', 'in_batch_dim': [8, 416], 'batch_size': 8, 'flops': 8226419240192}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3796808916480}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5695213340928}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4746011128704}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4429610391296}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4113209653888}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3480408179072}, {'type': 'perplexity', 'in_batch_dim': [8, 320], 'batch_size': 8, 'flops': 6328014815744}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3480408179072}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5378812603520}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6011614078336}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4429610391296}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5695213340928}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5695213340928}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3480408179072}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4429610391296}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5062411866112}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3480408179072}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4113209653888}, {'type': 'perplexity', 'in_batch_dim': [8, 352], 'batch_size': 8, 'flops': 6960816290560}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4113209653888}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2847606704256}, {'type': 'perplexity', 'in_batch_dim': [8, 160], 'batch_size': 8, 'flops': 3164007441664}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3796808916480}, {'type': 'perplexity', 'in_batch_dim': [8, 496], 'batch_size': 8, 'flops': 9808422927232}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3480408179072}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4113209653888}, {'type': 'perplexity', 'in_batch_dim': [8, 544], 'batch_size': 8, 'flops': 10757625139456}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6011614078336}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5378812603520}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5695213340928}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4746011128704}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5695213340928}, {'type': 'perplexity', 'in_batch_dim': [8, 384], 'batch_size': 8, 'flops': 7593617765376}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3480408179072}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5378812603520}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3480408179072}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5378812603520}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5062411866112}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3796808916480}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5062411866112}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5695213340928}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5378812603520}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4429610391296}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5062411866112}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3480408179072}, {'type': 'perplexity', 'in_batch_dim': [8, 656], 'batch_size': 8, 'flops': 12972430301312}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5062411866112}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3796808916480}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3480408179072}, {'type': 'perplexity', 'in_batch_dim': [8, 160], 'batch_size': 8, 'flops': 3164007441664}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5695213340928}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6011614078336}, {'type': 'perplexity', 'in_batch_dim': [8, 320], 'batch_size': 8, 'flops': 6328014815744}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5062411866112}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5062411866112}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5695213340928}, {'type': 'perplexity', 'in_batch_dim': [8, 320], 'batch_size': 8, 'flops': 6328014815744}, {'type': 'perplexity', 'in_batch_dim': [8, 384], 'batch_size': 8, 'flops': 7593617765376}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5062411866112}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4746011128704}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4113209653888}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4746011128704}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4746011128704}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3480408179072}, {'type': 'perplexity', 'in_batch_dim': [8, 320], 'batch_size': 8, 'flops': 6328014815744}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5695213340928}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5062411866112}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4113209653888}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4746011128704}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2847606704256}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3796808916480}, {'type': 'perplexity', 'in_batch_dim': [8, 336], 'batch_size': 8, 'flops': 6644415553152}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4113209653888}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4746011128704}, {'type': 'perplexity', 'in_batch_dim': [8, 496], 'batch_size': 8, 'flops': 9808422927232}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5378812603520}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5378812603520}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4429610391296}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5062411866112}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6011614078336}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4746011128704}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3796808916480}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4113209653888}, {'type': 'perplexity', 'in_batch_dim': [8, 496], 'batch_size': 8, 'flops': 9808422927232}, {'type': 'perplexity', 'in_batch_dim': [8, 448], 'batch_size': 8, 'flops': 8859220715008}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6011614078336}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3796808916480}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3480408179072}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5062411866112}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4429610391296}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4113209653888}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4746011128704}, {'type': 'perplexity', 'in_batch_dim': [8, 320], 'batch_size': 8, 'flops': 6328014815744}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6011614078336}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3480408179072}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3796808916480}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3796808916480}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4746011128704}, {'type': 'perplexity', 'in_batch_dim': [8, 416], 'batch_size': 8, 'flops': 8226419240192}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5695213340928}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6011614078336}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6011614078336}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4746011128704}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3480408179072}, {'type': 'perplexity', 'in_batch_dim': [8, 160], 'batch_size': 8, 'flops': 3164007441664}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5695213340928}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3480408179072}, {'type': 'perplexity', 'in_batch_dim': [8, 400], 'batch_size': 8, 'flops': 7910018502784}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4429610391296}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5378812603520}, {'type': 'perplexity', 'in_batch_dim': [8, 400], 'batch_size': 8, 'flops': 7910018502784}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5378812603520}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3480408179072}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6011614078336}, {'type': 'perplexity', 'in_batch_dim': [8, 160], 'batch_size': 8, 'flops': 3164007441664}, {'type': 'perplexity', 'in_batch_dim': [8, 352], 'batch_size': 8, 'flops': 6960816290560}, {'type': 'perplexity', 'in_batch_dim': [8, 320], 'batch_size': 8, 'flops': 6328014815744}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5062411866112}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5062411866112}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4113209653888}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4429610391296}, {'type': 'perplexity', 'in_batch_dim': [8, 352], 'batch_size': 8, 'flops': 6960816290560}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4113209653888}, {'type': 'perplexity', 'in_batch_dim': [8, 160], 'batch_size': 8, 'flops': 3164007441664}, {'type': 'perplexity', 'in_batch_dim': [8, 336], 'batch_size': 8, 'flops': 6644415553152}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4429610391296}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3796808916480}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5378812603520}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5378812603520}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4113209653888}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5378812603520}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3796808916480}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5378812603520}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5378812603520}, {'type': 'perplexity', 'in_batch_dim': [8, 160], 'batch_size': 8, 'flops': 3164007441664}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3480408179072}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6011614078336}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5695213340928}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5378812603520}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5378812603520}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4429610391296}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4746011128704}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6011614078336}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5062411866112}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6011614078336}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5378812603520}, {'type': 'perplexity', 'in_batch_dim': [8, 160], 'batch_size': 8, 'flops': 3164007441664}, {'type': 'perplexity', 'in_batch_dim': [8, 160], 'batch_size': 8, 'flops': 3164007441664}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5062411866112}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5062411866112}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3796808916480}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6011614078336}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4113209653888}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4746011128704}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5378812603520}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3480408179072}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3796808916480}, {'type': 'perplexity', 'in_batch_dim': [8, 320], 'batch_size': 8, 'flops': 6328014815744}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5062411866112}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4746011128704}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5062411866112}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4746011128704}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6011614078336}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3796808916480}, {'type': 'perplexity', 'in_batch_dim': [8, 160], 'batch_size': 8, 'flops': 3164007441664}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2847606704256}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4429610391296}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4746011128704}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5062411866112}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5378812603520}, {'type': 'perplexity', 'in_batch_dim': [8, 464], 'batch_size': 8, 'flops': 9175621452416}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4113209653888}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3480408179072}, {'type': 'perplexity', 'in_batch_dim': [8, 384], 'batch_size': 8, 'flops': 7593617765376}, {'type': 'perplexity', 'in_batch_dim': [8, 448], 'batch_size': 8, 'flops': 8859220715008}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5378812603520}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4113209653888}, {'type': 'perplexity', 'in_batch_dim': [8, 320], 'batch_size': 8, 'flops': 6328014815744}, {'type': 'perplexity', 'in_batch_dim': [8, 560], 'batch_size': 8, 'flops': 11074025876864}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5378812603520}, {'type': 'perplexity', 'in_batch_dim': [8, 160], 'batch_size': 8, 'flops': 3164007441664}, {'type': 'perplexity', 'in_batch_dim': [8, 320], 'batch_size': 8, 'flops': 6328014815744}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5062411866112}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6011614078336}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4746011128704}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6011614078336}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3796808916480}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4429610391296}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4113209653888}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4746011128704}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3480408179072}, {'type': 'perplexity', 'in_batch_dim': [8, 384], 'batch_size': 8, 'flops': 7593617765376}, {'type': 'perplexity', 'in_batch_dim': [8, 576], 'batch_size': 8, 'flops': 11390426614272}, {'type': 'perplexity', 'in_batch_dim': [8, 160], 'batch_size': 8, 'flops': 3164007441664}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4746011128704}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4746011128704}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4429610391296}, {'type': 'perplexity', 'in_batch_dim': [8, 336], 'batch_size': 8, 'flops': 6644415553152}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4746011128704}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3796808916480}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3796808916480}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5062411866112}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5378812603520}, {'type': 'perplexity', 'in_batch_dim': [8, 368], 'batch_size': 8, 'flops': 7277217027968}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3480408179072}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4113209653888}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4746011128704}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3480408179072}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4746011128704}, {'type': 'perplexity', 'in_batch_dim': [8, 1168], 'batch_size': 8, 'flops': 23097253898368}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5062411866112}, {'type': 'perplexity', 'in_batch_dim': [8, 160], 'batch_size': 8, 'flops': 3164007441664}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4746011128704}, {'type': 'perplexity', 'in_batch_dim': [8, 640], 'batch_size': 8, 'flops': 12656029563904}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5378812603520}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4429610391296}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4429610391296}, {'type': 'perplexity', 'in_batch_dim': [8, 384], 'batch_size': 8, 'flops': 7593617765376}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3480408179072}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4113209653888}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3796808916480}, {'type': 'perplexity', 'in_batch_dim': [8, 336], 'batch_size': 8, 'flops': 6644415553152}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3480408179072}, {'type': 'perplexity', 'in_batch_dim': [6, 208], 'batch_size': 8, 'flops': 4113209653888}], 'timestamp': '2025-09-10 02:16:01.984520', 'step': 0, 'epoch': 0} {'type': 'pplx', 'content': 54140675.446864516, 'timestamp': '2025-09-10 02:16:01.988837', 'step': 0, 'epoch': 0} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 6644714900096}, 'timestamp': '2025-09-10 02:16:02.062821', 'step': 0, 'epoch': 1} {'type': 'loss', 'content': 0.6008338332176208, 'timestamp': '2025-09-10 02:16:02.064796', 'step': 1, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 7593922352128}, 'timestamp': '2025-09-10 02:16:02.110853', 'step': 1, 'epoch': 1} {'type': 'loss', 'content': 0.5395371317863464, 'timestamp': '2025-09-10 02:16:02.115148', 'step': 2, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 6644714900096}, 'timestamp': '2025-09-10 02:16:02.146065', 'step': 2, 'epoch': 1} {'type': 'loss', 'content': 0.547315239906311, 'timestamp': '2025-09-10 02:16:02.152973', 'step': 3, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 6170111174080}, 'timestamp': '2025-09-10 02:16:02.197495', 'step': 3, 'epoch': 1} {'type': 'loss', 'content': 0.6588919758796692, 'timestamp': '2025-09-10 02:16:02.248579', 'step': 4, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 6644714900096}, 'timestamp': '2025-09-10 02:16:02.280581', 'step': 4, 'epoch': 1} {'type': 'loss', 'content': 0.2344198077917099, 'timestamp': '2025-09-10 02:16:02.284584', 'step': 5, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 176], 'flops': 5220903722048}, 'timestamp': '2025-09-10 02:16:02.335992', 'step': 5, 'epoch': 1} {'type': 'loss', 'content': 0.18304279446601868, 'timestamp': '2025-09-10 02:16:02.338088', 'step': 6, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 6644714900096}, 'timestamp': '2025-09-10 02:16:02.368461', 'step': 6, 'epoch': 1} {'type': 'loss', 'content': 0.1824495494365692, 'timestamp': '2025-09-10 02:16:02.375219', 'step': 7, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 7119318626112}, 'timestamp': '2025-09-10 02:16:02.418461', 'step': 7, 'epoch': 1} {'type': 'loss', 'content': 0.21861636638641357, 'timestamp': '2025-09-10 02:16:02.443617', 'step': 8, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 8068526078144}, 'timestamp': '2025-09-10 02:16:02.500477', 'step': 8, 'epoch': 1} {'type': 'loss', 'content': 0.0901818498969078, 'timestamp': '2025-09-10 02:16:02.503792', 'step': 9, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 7593922352128}, 'timestamp': '2025-09-10 02:16:02.534851', 'step': 9, 'epoch': 1} {'type': 'loss', 'content': 0.06925918161869049, 'timestamp': '2025-09-10 02:16:02.542382', 'step': 10, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 6170111174080}, 'timestamp': '2025-09-10 02:16:02.581087', 'step': 10, 'epoch': 1} {'type': 'loss', 'content': 0.06179536134004593, 'timestamp': '2025-09-10 02:16:02.587234', 'step': 11, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 6170111174080}, 'timestamp': '2025-09-10 02:16:02.625224', 'step': 11, 'epoch': 1} {'type': 'loss', 'content': 0.060144226998090744, 'timestamp': '2025-09-10 02:16:02.652677', 'step': 12, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 5695507448064}, 'timestamp': '2025-09-10 02:16:02.700676', 'step': 12, 'epoch': 1} {'type': 'loss', 'content': 0.04763566702604294, 'timestamp': '2025-09-10 02:16:02.704489', 'step': 13, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 336], 'flops': 9966940982208}, 'timestamp': '2025-09-10 02:16:02.757375', 'step': 13, 'epoch': 1} {'type': 'loss', 'content': 0.04455741122364998, 'timestamp': '2025-09-10 02:16:02.765877', 'step': 14, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 5695507448064}, 'timestamp': '2025-09-10 02:16:02.807895', 'step': 14, 'epoch': 1} {'type': 'loss', 'content': 0.03932064399123192, 'timestamp': '2025-09-10 02:16:02.812005', 'step': 15, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 176], 'flops': 5220903722048}, 'timestamp': '2025-09-10 02:16:02.847941', 'step': 15, 'epoch': 1} {'type': 'loss', 'content': 0.02747354283928871, 'timestamp': '2025-09-10 02:16:02.874022', 'step': 16, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 304], 'flops': 9017733530176}, 'timestamp': '2025-09-10 02:16:02.926383', 'step': 16, 'epoch': 1} {'type': 'loss', 'content': 0.02676134742796421, 'timestamp': '2025-09-10 02:16:02.930699', 'step': 17, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 5695507448064}, 'timestamp': '2025-09-10 02:16:02.967263', 'step': 17, 'epoch': 1} {'type': 'loss', 'content': 0.036269430071115494, 'timestamp': '2025-09-10 02:16:02.971943', 'step': 18, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 8068526078144}, 'timestamp': '2025-09-10 02:16:03.013820', 'step': 18, 'epoch': 1} {'type': 'loss', 'content': 0.025049904361367226, 'timestamp': '2025-09-10 02:16:03.023342', 'step': 19, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 5695507448064}, 'timestamp': '2025-09-10 02:16:03.064093', 'step': 19, 'epoch': 1} {'type': 'loss', 'content': 0.038132019340991974, 'timestamp': '2025-09-10 02:16:03.092030', 'step': 20, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 8068526078144}, 'timestamp': '2025-09-10 02:16:03.131090', 'step': 20, 'epoch': 1} {'type': 'loss', 'content': 0.02351507358253002, 'timestamp': '2025-09-10 02:16:03.138086', 'step': 21, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 176], 'flops': 5220903722048}, 'timestamp': '2025-09-10 02:16:03.174188', 'step': 21, 'epoch': 1} {'type': 'loss', 'content': 0.016791896894574165, 'timestamp': '2025-09-10 02:16:03.177900', 'step': 22, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 6170111174080}, 'timestamp': '2025-09-10 02:16:03.210824', 'step': 22, 'epoch': 1} {'type': 'loss', 'content': 0.028946993872523308, 'timestamp': '2025-09-10 02:16:03.217309', 'step': 23, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 7119318626112}, 'timestamp': '2025-09-10 02:16:03.248257', 'step': 23, 'epoch': 1} {'type': 'loss', 'content': 0.030578048899769783, 'timestamp': '2025-09-10 02:16:03.276442', 'step': 24, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 6644714900096}, 'timestamp': '2025-09-10 02:16:03.307551', 'step': 24, 'epoch': 1} {'type': 'loss', 'content': 0.0187423974275589, 'timestamp': '2025-09-10 02:16:03.311793', 'step': 25, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 6644714900096}, 'timestamp': '2025-09-10 02:16:03.343360', 'step': 25, 'epoch': 1} {'type': 'loss', 'content': 0.01975913718342781, 'timestamp': '2025-09-10 02:16:03.350089', 'step': 26, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 6170111174080}, 'timestamp': '2025-09-10 02:16:03.380780', 'step': 26, 'epoch': 1} {'type': 'loss', 'content': 0.0228941161185503, 'timestamp': '2025-09-10 02:16:03.387364', 'step': 27, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 4746299996032}, 'timestamp': '2025-09-10 02:16:03.436367', 'step': 27, 'epoch': 1} {'type': 'loss', 'content': 0.02786724641919136, 'timestamp': '2025-09-10 02:16:03.460600', 'step': 28, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 7119318626112}, 'timestamp': '2025-09-10 02:16:03.493390', 'step': 28, 'epoch': 1} {'type': 'loss', 'content': 0.045785628259181976, 'timestamp': '2025-09-10 02:16:03.498028', 'step': 29, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 352], 'flops': 10441544708224}, 'timestamp': '2025-09-10 02:16:03.533826', 'step': 29, 'epoch': 1} {'type': 'loss', 'content': 0.004484932404011488, 'timestamp': '2025-09-10 02:16:03.546978', 'step': 30, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 6170111174080}, 'timestamp': '2025-09-10 02:16:03.578360', 'step': 30, 'epoch': 1} {'type': 'loss', 'content': 0.05362967774271965, 'timestamp': '2025-09-10 02:16:03.584874', 'step': 31, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 6644714900096}, 'timestamp': '2025-09-10 02:16:03.616459', 'step': 31, 'epoch': 1} {'type': 'loss', 'content': 0.06305649876594543, 'timestamp': '2025-09-10 02:16:03.644134', 'step': 32, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 176], 'flops': 5220903722048}, 'timestamp': '2025-09-10 02:16:03.677074', 'step': 32, 'epoch': 1} {'type': 'loss', 'content': 0.015654366463422775, 'timestamp': '2025-09-10 02:16:03.679219', 'step': 33, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 7593922352128}, 'timestamp': '2025-09-10 02:16:03.710464', 'step': 33, 'epoch': 1} {'type': 'loss', 'content': 0.02429381012916565, 'timestamp': '2025-09-10 02:16:03.717986', 'step': 34, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 8068526078144}, 'timestamp': '2025-09-10 02:16:03.749170', 'step': 34, 'epoch': 1} {'type': 'loss', 'content': 0.04219824820756912, 'timestamp': '2025-09-10 02:16:03.758869', 'step': 35, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 4746299996032}, 'timestamp': '2025-09-10 02:16:03.789600', 'step': 35, 'epoch': 1} {'type': 'loss', 'content': 0.05330771207809448, 'timestamp': '2025-09-10 02:16:03.813089', 'step': 36, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 6644714900096}, 'timestamp': '2025-09-10 02:16:03.843661', 'step': 36, 'epoch': 1} {'type': 'loss', 'content': 0.013570256531238556, 'timestamp': '2025-09-10 02:16:03.848084', 'step': 37, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-10 02:16:03.893622', 'step': 37, 'epoch': 1} {'type': 'loss', 'content': 0.01802876405417919, 'timestamp': '2025-09-10 02:16:03.895915', 'step': 38, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 5695507448064}, 'timestamp': '2025-09-10 02:16:03.928023', 'step': 38, 'epoch': 1} {'type': 'loss', 'content': 0.02937530353665352, 'timestamp': '2025-09-10 02:16:03.932421', 'step': 39, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 6644714900096}, 'timestamp': '2025-09-10 02:16:03.962845', 'step': 39, 'epoch': 1} {'type': 'loss', 'content': 0.03161190077662468, 'timestamp': '2025-09-10 02:16:03.990473', 'step': 40, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 7119318626112}, 'timestamp': '2025-09-10 02:16:04.022146', 'step': 40, 'epoch': 1} {'type': 'loss', 'content': 0.03022809512913227, 'timestamp': '2025-09-10 02:16:04.026833', 'step': 41, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 5695507448064}, 'timestamp': '2025-09-10 02:16:04.058668', 'step': 41, 'epoch': 1} {'type': 'loss', 'content': 0.027601536363363266, 'timestamp': '2025-09-10 02:16:04.062673', 'step': 42, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 336], 'flops': 9966940982208}, 'timestamp': '2025-09-10 02:16:04.097743', 'step': 42, 'epoch': 1} {'type': 'loss', 'content': 0.021324804052710533, 'timestamp': '2025-09-10 02:16:04.111039', 'step': 43, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 6644714900096}, 'timestamp': '2025-09-10 02:16:04.143441', 'step': 43, 'epoch': 1} {'type': 'loss', 'content': 0.020267976447939873, 'timestamp': '2025-09-10 02:16:04.171221', 'step': 44, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 176], 'flops': 5220903722048}, 'timestamp': '2025-09-10 02:16:04.202635', 'step': 44, 'epoch': 1} {'type': 'loss', 'content': 0.02561989612877369, 'timestamp': '2025-09-10 02:16:04.204754', 'step': 45, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 6644714900096}, 'timestamp': '2025-09-10 02:16:04.235907', 'step': 45, 'epoch': 1} {'type': 'loss', 'content': 0.025528931990265846, 'timestamp': '2025-09-10 02:16:04.242862', 'step': 46, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 8068526078144}, 'timestamp': '2025-09-10 02:16:04.273959', 'step': 46, 'epoch': 1} {'type': 'loss', 'content': 0.021888835355639458, 'timestamp': '2025-09-10 02:16:04.283691', 'step': 47, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 6170111174080}, 'timestamp': '2025-09-10 02:16:04.315341', 'step': 47, 'epoch': 1} {'type': 'loss', 'content': 0.027732163667678833, 'timestamp': '2025-09-10 02:16:04.342901', 'step': 48, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 8543129804160}, 'timestamp': '2025-09-10 02:16:04.379523', 'step': 48, 'epoch': 1} {'type': 'loss', 'content': 0.03233006224036217, 'timestamp': '2025-09-10 02:16:04.383378', 'step': 49, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 8543129804160}, 'timestamp': '2025-09-10 02:16:04.414652', 'step': 49, 'epoch': 1} {'type': 'loss', 'content': 0.021881645545363426, 'timestamp': '2025-09-10 02:16:04.425174', 'step': 50, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 4746299996032}, 'timestamp': '2025-09-10 02:16:04.455872', 'step': 50, 'epoch': 1} {'type': 'loss', 'content': 0.03416941687464714, 'timestamp': '2025-09-10 02:16:04.458081', 'step': 51, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 928], 'flops': 27527278844800}, 'timestamp': '2025-09-10 02:16:04.632897', 'step': 51, 'epoch': 1} {'type': 'loss', 'content': 0.027697524055838585, 'timestamp': '2025-09-10 02:16:04.656929', 'step': 52, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 5695507448064}, 'timestamp': '2025-09-10 02:16:04.694123', 'step': 52, 'epoch': 1} {'type': 'loss', 'content': 0.032309334725141525, 'timestamp': '2025-09-10 02:16:04.696565', 'step': 53, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 8068526078144}, 'timestamp': '2025-09-10 02:16:04.732462', 'step': 53, 'epoch': 1} {'type': 'loss', 'content': 0.014240605756640434, 'timestamp': '2025-09-10 02:16:04.739274', 'step': 54, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 7119318626112}, 'timestamp': '2025-09-10 02:16:04.773962', 'step': 54, 'epoch': 1} {'type': 'loss', 'content': 0.03165564686059952, 'timestamp': '2025-09-10 02:16:04.779367', 'step': 55, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 400], 'flops': 11865355886272}, 'timestamp': '2025-09-10 02:16:04.829001', 'step': 55, 'epoch': 1} {'type': 'loss', 'content': 0.028052538633346558, 'timestamp': '2025-09-10 02:16:04.861010', 'step': 56, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-10 02:16:04.896301', 'step': 56, 'epoch': 1} {'type': 'loss', 'content': 0.02410052716732025, 'timestamp': '2025-09-10 02:16:04.898695', 'step': 57, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 6644714900096}, 'timestamp': '2025-09-10 02:16:04.932687', 'step': 57, 'epoch': 1} {'type': 'loss', 'content': 0.028389716520905495, 'timestamp': '2025-09-10 02:16:04.938045', 'step': 58, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 8068526078144}, 'timestamp': '2025-09-10 02:16:04.970808', 'step': 58, 'epoch': 1} {'type': 'loss', 'content': 0.02223231829702854, 'timestamp': '2025-09-10 02:16:04.979603', 'step': 59, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 7593922352128}, 'timestamp': '2025-09-10 02:16:05.012882', 'step': 59, 'epoch': 1} {'type': 'loss', 'content': 0.025019675493240356, 'timestamp': '2025-09-10 02:16:05.040525', 'step': 60, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 304], 'flops': 9017733530176}, 'timestamp': '2025-09-10 02:16:05.075959', 'step': 60, 'epoch': 1} {'type': 'loss', 'content': 0.02373148687183857, 'timestamp': '2025-09-10 02:16:05.083328', 'step': 61, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 6170111174080}, 'timestamp': '2025-09-10 02:16:05.118418', 'step': 61, 'epoch': 1} {'type': 'loss', 'content': 0.0228324793279171, 'timestamp': '2025-09-10 02:16:05.123087', 'step': 62, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 7593922352128}, 'timestamp': '2025-09-10 02:16:05.159154', 'step': 62, 'epoch': 1} {'type': 'loss', 'content': 0.02589366026222706, 'timestamp': '2025-09-10 02:16:05.165199', 'step': 63, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 6644714900096}, 'timestamp': '2025-09-10 02:16:05.196884', 'step': 63, 'epoch': 1} {'type': 'loss', 'content': 0.02020171843469143, 'timestamp': '2025-09-10 02:16:05.224420', 'step': 64, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 416], 'flops': 12339959612288}, 'timestamp': '2025-09-10 02:16:05.262460', 'step': 64, 'epoch': 1} {'type': 'loss', 'content': 0.019750652834773064, 'timestamp': '2025-09-10 02:16:05.277816', 'step': 65, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 6644714900096}, 'timestamp': '2025-09-10 02:16:05.309484', 'step': 65, 'epoch': 1} {'type': 'loss', 'content': 0.022475482895970345, 'timestamp': '2025-09-10 02:16:05.316076', 'step': 66, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 176], 'flops': 5220903722048}, 'timestamp': '2025-09-10 02:16:05.349346', 'step': 66, 'epoch': 1} {'type': 'loss', 'content': 0.031203726306557655, 'timestamp': '2025-09-10 02:16:05.351899', 'step': 67, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 5695507448064}, 'timestamp': '2025-09-10 02:16:05.383549', 'step': 67, 'epoch': 1} {'type': 'loss', 'content': 0.026068750768899918, 'timestamp': '2025-09-10 02:16:05.408325', 'step': 68, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 7593922352128}, 'timestamp': '2025-09-10 02:16:05.441620', 'step': 68, 'epoch': 1} {'type': 'loss', 'content': 0.020249370485544205, 'timestamp': '2025-09-10 02:16:05.445702', 'step': 69, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 416], 'flops': 12339959612288}, 'timestamp': '2025-09-10 02:16:05.485622', 'step': 69, 'epoch': 1} {'type': 'loss', 'content': 0.02251577563583851, 'timestamp': '2025-09-10 02:16:05.501212', 'step': 70, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 7119318626112}, 'timestamp': '2025-09-10 02:16:05.533112', 'step': 70, 'epoch': 1} {'type': 'loss', 'content': 0.02423388697206974, 'timestamp': '2025-09-10 02:16:05.540249', 'step': 71, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 320], 'flops': 9492337256192}, 'timestamp': '2025-09-10 02:16:05.580456', 'step': 71, 'epoch': 1} {'type': 'loss', 'content': 0.016923097893595695, 'timestamp': '2025-09-10 02:16:05.605965', 'step': 72, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 6644714900096}, 'timestamp': '2025-09-10 02:16:05.641491', 'step': 72, 'epoch': 1} {'type': 'loss', 'content': 0.029709434136748314, 'timestamp': '2025-09-10 02:16:05.644029', 'step': 73, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 6170111174080}, 'timestamp': '2025-09-10 02:16:05.677693', 'step': 73, 'epoch': 1} {'type': 'loss', 'content': 0.013269062153995037, 'timestamp': '2025-09-10 02:16:05.683412', 'step': 74, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 8543129804160}, 'timestamp': '2025-09-10 02:16:05.716854', 'step': 74, 'epoch': 1} {'type': 'loss', 'content': 0.03226935863494873, 'timestamp': '2025-09-10 02:16:05.725697', 'step': 75, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 400], 'flops': 11865355886272}, 'timestamp': '2025-09-10 02:16:05.765814', 'step': 75, 'epoch': 1} {'type': 'loss', 'content': 0.020103048533201218, 'timestamp': '2025-09-10 02:16:05.802277', 'step': 76, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 6170111174080}, 'timestamp': '2025-09-10 02:16:05.833640', 'step': 76, 'epoch': 1} {'type': 'loss', 'content': 0.018879475072026253, 'timestamp': '2025-09-10 02:16:05.837910', 'step': 77, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 176], 'flops': 5220903722048}, 'timestamp': '2025-09-10 02:16:05.869054', 'step': 77, 'epoch': 1} {'type': 'loss', 'content': 0.038980383425951004, 'timestamp': '2025-09-10 02:16:05.872959', 'step': 78, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 336], 'flops': 9966940982208}, 'timestamp': '2025-09-10 02:16:05.906349', 'step': 78, 'epoch': 1} {'type': 'loss', 'content': 0.029591679573059082, 'timestamp': '2025-09-10 02:16:05.919579', 'step': 79, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 400], 'flops': 11865355886272}, 'timestamp': '2025-09-10 02:16:05.958106', 'step': 79, 'epoch': 1} {'type': 'loss', 'content': 0.014890284277498722, 'timestamp': '2025-09-10 02:16:05.994447', 'step': 80, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 5695507448064}, 'timestamp': '2025-09-10 02:16:06.026426', 'step': 80, 'epoch': 1} {'type': 'loss', 'content': 0.009303289465606213, 'timestamp': '2025-09-10 02:16:06.028566', 'step': 81, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 304], 'flops': 9017733530176}, 'timestamp': '2025-09-10 02:16:06.059374', 'step': 81, 'epoch': 1} {'type': 'loss', 'content': 0.02382597140967846, 'timestamp': '2025-09-10 02:16:06.071305', 'step': 82, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 8068526078144}, 'timestamp': '2025-09-10 02:16:06.103494', 'step': 82, 'epoch': 1} {'type': 'loss', 'content': 0.009986629709601402, 'timestamp': '2025-09-10 02:16:06.113405', 'step': 83, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 5695507448064}, 'timestamp': '2025-09-10 02:16:06.146437', 'step': 83, 'epoch': 1} {'type': 'loss', 'content': 0.022146521136164665, 'timestamp': '2025-09-10 02:16:06.171621', 'step': 84, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 6170111174080}, 'timestamp': '2025-09-10 02:16:06.203414', 'step': 84, 'epoch': 1} {'type': 'loss', 'content': 0.03081861138343811, 'timestamp': '2025-09-10 02:16:06.207612', 'step': 85, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 352], 'flops': 10441544708224}, 'timestamp': '2025-09-10 02:16:06.241382', 'step': 85, 'epoch': 1} {'type': 'loss', 'content': 0.042918942868709564, 'timestamp': '2025-09-10 02:16:06.254938', 'step': 86, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 5695507448064}, 'timestamp': '2025-09-10 02:16:06.286234', 'step': 86, 'epoch': 1} {'type': 'loss', 'content': 0.01463954895734787, 'timestamp': '2025-09-10 02:16:06.290359', 'step': 87, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 7119318626112}, 'timestamp': '2025-09-10 02:16:06.321404', 'step': 87, 'epoch': 1} {'type': 'loss', 'content': 0.025244222953915596, 'timestamp': '2025-09-10 02:16:06.349501', 'step': 88, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 496], 'flops': 14712978242368}, 'timestamp': '2025-09-10 02:16:06.393581', 'step': 88, 'epoch': 1} {'type': 'loss', 'content': 0.018959475681185722, 'timestamp': '2025-09-10 02:16:06.410750', 'step': 89, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 6644714900096}, 'timestamp': '2025-09-10 02:16:06.442741', 'step': 89, 'epoch': 1} {'type': 'loss', 'content': 0.019290607422590256, 'timestamp': '2025-09-10 02:16:06.449555', 'step': 90, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 6170111174080}, 'timestamp': '2025-09-10 02:16:06.480195', 'step': 90, 'epoch': 1} {'type': 'loss', 'content': 0.037016745656728745, 'timestamp': '2025-09-10 02:16:06.486836', 'step': 91, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 336], 'flops': 9966940982208}, 'timestamp': '2025-09-10 02:16:06.520237', 'step': 91, 'epoch': 1} {'type': 'loss', 'content': 0.01576540246605873, 'timestamp': '2025-09-10 02:16:06.554391', 'step': 92, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 5695507448064}, 'timestamp': '2025-09-10 02:16:06.586174', 'step': 92, 'epoch': 1} {'type': 'loss', 'content': 0.029999637976288795, 'timestamp': '2025-09-10 02:16:06.588193', 'step': 93, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 6644714900096}, 'timestamp': '2025-09-10 02:16:06.618830', 'step': 93, 'epoch': 1} {'type': 'loss', 'content': 0.007616397459059954, 'timestamp': '2025-09-10 02:16:06.625809', 'step': 94, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 480], 'flops': 14238374516352}, 'timestamp': '2025-09-10 02:16:06.672877', 'step': 94, 'epoch': 1} {'type': 'loss', 'content': 0.041450273245573044, 'timestamp': '2025-09-10 02:16:06.687746', 'step': 95, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 6170111174080}, 'timestamp': '2025-09-10 02:16:06.719554', 'step': 95, 'epoch': 1} {'type': 'loss', 'content': 0.028501790016889572, 'timestamp': '2025-09-10 02:16:06.746921', 'step': 96, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 7593922352128}, 'timestamp': '2025-09-10 02:16:06.780163', 'step': 96, 'epoch': 1} {'type': 'loss', 'content': 0.031911808997392654, 'timestamp': '2025-09-10 02:16:06.785472', 'step': 97, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 5695507448064}, 'timestamp': '2025-09-10 02:16:06.816090', 'step': 97, 'epoch': 1} {'type': 'loss', 'content': 0.010332711972296238, 'timestamp': '2025-09-10 02:16:06.820432', 'step': 98, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 5695507448064}, 'timestamp': '2025-09-10 02:16:06.850962', 'step': 98, 'epoch': 1} {'type': 'loss', 'content': 0.014578322879970074, 'timestamp': '2025-09-10 02:16:06.855363', 'step': 99, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 6644714900096}, 'timestamp': '2025-09-10 02:16:06.886347', 'step': 99, 'epoch': 1} {'type': 'loss', 'content': 0.030410753563046455, 'timestamp': '2025-09-10 02:16:06.914188', 'step': 100, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 4746299996032}, 'timestamp': '2025-09-10 02:16:06.945693', 'step': 100, 'epoch': 1} {'type': 'loss', 'content': 0.019498659297823906, 'timestamp': '2025-09-10 02:16:06.948062', 'step': 101, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 6644714900096}, 'timestamp': '2025-09-10 02:16:06.981389', 'step': 101, 'epoch': 1} {'type': 'loss', 'content': 0.02664143405854702, 'timestamp': '2025-09-10 02:16:06.988136', 'step': 102, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 7593922352128}, 'timestamp': '2025-09-10 02:16:07.018987', 'step': 102, 'epoch': 1} {'type': 'loss', 'content': 0.0366455540060997, 'timestamp': '2025-09-10 02:16:07.026481', 'step': 103, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 304], 'flops': 9017733530176}, 'timestamp': '2025-09-10 02:16:07.057910', 'step': 103, 'epoch': 1} {'type': 'loss', 'content': 0.021227413788437843, 'timestamp': '2025-09-10 02:16:07.090878', 'step': 104, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 8543129804160}, 'timestamp': '2025-09-10 02:16:07.121568', 'step': 104, 'epoch': 1} {'type': 'loss', 'content': 0.016079608350992203, 'timestamp': '2025-09-10 02:16:07.130132', 'step': 105, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 7119318626112}, 'timestamp': '2025-09-10 02:16:07.161498', 'step': 105, 'epoch': 1} {'type': 'loss', 'content': 0.01837443746626377, 'timestamp': '2025-09-10 02:16:07.168957', 'step': 106, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 6170111174080}, 'timestamp': '2025-09-10 02:16:07.199456', 'step': 106, 'epoch': 1} {'type': 'loss', 'content': 0.029402051120996475, 'timestamp': '2025-09-10 02:16:07.206169', 'step': 107, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 7593922352128}, 'timestamp': '2025-09-10 02:16:07.237182', 'step': 107, 'epoch': 1} {'type': 'loss', 'content': 0.021664408966898918, 'timestamp': '2025-09-10 02:16:07.265754', 'step': 108, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 8068526078144}, 'timestamp': '2025-09-10 02:16:07.295601', 'step': 108, 'epoch': 1} {'type': 'loss', 'content': 0.01921442337334156, 'timestamp': '2025-09-10 02:16:07.303172', 'step': 109, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 6644714900096}, 'timestamp': '2025-09-10 02:16:07.333506', 'step': 109, 'epoch': 1} {'type': 'loss', 'content': 0.016907794401049614, 'timestamp': '2025-09-10 02:16:07.340549', 'step': 110, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 320], 'flops': 9492337256192}, 'timestamp': '2025-09-10 02:16:07.372342', 'step': 110, 'epoch': 1} {'type': 'loss', 'content': 0.018889309838414192, 'timestamp': '2025-09-10 02:16:07.384880', 'step': 111, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 7119318626112}, 'timestamp': '2025-09-10 02:16:07.416259', 'step': 111, 'epoch': 1} {'type': 'loss', 'content': 0.014976476319134235, 'timestamp': '2025-09-10 02:16:07.444498', 'step': 112, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 7119318626112}, 'timestamp': '2025-09-10 02:16:07.476485', 'step': 112, 'epoch': 1} {'type': 'loss', 'content': 0.020062191411852837, 'timestamp': '2025-09-10 02:16:07.481612', 'step': 113, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 6170111174080}, 'timestamp': '2025-09-10 02:16:07.511720', 'step': 113, 'epoch': 1} {'type': 'loss', 'content': 0.023766087368130684, 'timestamp': '2025-09-10 02:16:07.518727', 'step': 114, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 7119318626112}, 'timestamp': '2025-09-10 02:16:07.549994', 'step': 114, 'epoch': 1} {'type': 'loss', 'content': 0.021067747846245766, 'timestamp': '2025-09-10 02:16:07.557452', 'step': 115, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 6644714900096}, 'timestamp': '2025-09-10 02:16:07.589333', 'step': 115, 'epoch': 1} {'type': 'loss', 'content': 0.020303750410676003, 'timestamp': '2025-09-10 02:16:07.617297', 'step': 116, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 336], 'flops': 9966940982208}, 'timestamp': '2025-09-10 02:16:07.649366', 'step': 116, 'epoch': 1} {'type': 'loss', 'content': 0.02873547188937664, 'timestamp': '2025-09-10 02:16:07.661942', 'step': 117, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 6170111174080}, 'timestamp': '2025-09-10 02:16:07.692247', 'step': 117, 'epoch': 1} {'type': 'loss', 'content': 0.021697448566555977, 'timestamp': '2025-09-10 02:16:07.699045', 'step': 118, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 320], 'flops': 9492337256192}, 'timestamp': '2025-09-10 02:16:07.730118', 'step': 118, 'epoch': 1} {'type': 'loss', 'content': 0.02336110547184944, 'timestamp': '2025-09-10 02:16:07.742698', 'step': 119, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 336], 'flops': 9966940982208}, 'timestamp': '2025-09-10 02:16:07.777761', 'step': 119, 'epoch': 1} {'type': 'loss', 'content': 0.02601124718785286, 'timestamp': '2025-09-10 02:16:07.810365', 'step': 120, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 304], 'flops': 9017733530176}, 'timestamp': '2025-09-10 02:16:07.841544', 'step': 120, 'epoch': 1} {'type': 'loss', 'content': 0.019523756578564644, 'timestamp': '2025-09-10 02:16:07.851022', 'step': 121, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 8068526078144}, 'timestamp': '2025-09-10 02:16:07.881927', 'step': 121, 'epoch': 1} {'type': 'loss', 'content': 0.019188281148672104, 'timestamp': '2025-09-10 02:16:07.891951', 'step': 122, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 6644714900096}, 'timestamp': '2025-09-10 02:16:07.922749', 'step': 122, 'epoch': 1} {'type': 'loss', 'content': 0.020111748948693275, 'timestamp': '2025-09-10 02:16:07.929577', 'step': 123, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 8543129804160}, 'timestamp': '2025-09-10 02:16:07.961525', 'step': 123, 'epoch': 1} {'type': 'loss', 'content': 0.02041424997150898, 'timestamp': '2025-09-10 02:16:07.993208', 'step': 124, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 4746299996032}, 'timestamp': '2025-09-10 02:16:08.023902', 'step': 124, 'epoch': 1} {'type': 'loss', 'content': 0.03184106573462486, 'timestamp': '2025-09-10 02:16:08.026634', 'step': 125, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 8068526078144}, 'timestamp': '2025-09-10 02:16:08.056664', 'step': 125, 'epoch': 1} {'type': 'loss', 'content': 0.02083834446966648, 'timestamp': '2025-09-10 02:16:08.066958', 'step': 126, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 384], 'flops': 11390752160256}, 'timestamp': '2025-09-10 02:16:08.102647', 'step': 126, 'epoch': 1} {'type': 'loss', 'content': 0.025340793654322624, 'timestamp': '2025-09-10 02:16:08.116615', 'step': 127, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-10 02:16:08.148390', 'step': 127, 'epoch': 1} {'type': 'loss', 'content': 0.021747667342424393, 'timestamp': '2025-09-10 02:16:08.171962', 'step': 128, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 320], 'flops': 9492337256192}, 'timestamp': '2025-09-10 02:16:08.203715', 'step': 128, 'epoch': 1} {'type': 'loss', 'content': 0.025707753375172615, 'timestamp': '2025-09-10 02:16:08.213867', 'step': 129, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-10 02:16:08.245075', 'step': 129, 'epoch': 1} {'type': 'loss', 'content': 0.01971745304763317, 'timestamp': '2025-09-10 02:16:08.247284', 'step': 130, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 7119318626112}, 'timestamp': '2025-09-10 02:16:08.279079', 'step': 130, 'epoch': 1} {'type': 'loss', 'content': 0.02148953266441822, 'timestamp': '2025-09-10 02:16:08.286537', 'step': 131, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 5695507448064}, 'timestamp': '2025-09-10 02:16:08.317714', 'step': 131, 'epoch': 1} {'type': 'loss', 'content': 0.02072383277118206, 'timestamp': '2025-09-10 02:16:08.342913', 'step': 132, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 7119318626112}, 'timestamp': '2025-09-10 02:16:08.374235', 'step': 132, 'epoch': 1} {'type': 'loss', 'content': 0.021854082122445107, 'timestamp': '2025-09-10 02:16:08.379220', 'step': 133, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 7593922352128}, 'timestamp': '2025-09-10 02:16:08.411433', 'step': 133, 'epoch': 1} {'type': 'loss', 'content': 0.016318751499056816, 'timestamp': '2025-09-10 02:16:08.419281', 'step': 134, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 304], 'flops': 9017733530176}, 'timestamp': '2025-09-10 02:16:08.449943', 'step': 134, 'epoch': 1} {'type': 'loss', 'content': 0.02498047612607479, 'timestamp': '2025-09-10 02:16:08.462062', 'step': 135, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 6644714900096}, 'timestamp': '2025-09-10 02:16:08.493203', 'step': 135, 'epoch': 1} {'type': 'loss', 'content': 0.02256803587079048, 'timestamp': '2025-09-10 02:16:08.521197', 'step': 136, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 304], 'flops': 9017733530176}, 'timestamp': '2025-09-10 02:16:08.551083', 'step': 136, 'epoch': 1} {'type': 'loss', 'content': 0.017909932881593704, 'timestamp': '2025-09-10 02:16:08.560671', 'step': 137, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 6170111174080}, 'timestamp': '2025-09-10 02:16:08.591595', 'step': 137, 'epoch': 1} {'type': 'loss', 'content': 0.019151031970977783, 'timestamp': '2025-09-10 02:16:08.598213', 'step': 138, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 6644714900096}, 'timestamp': '2025-09-10 02:16:08.627700', 'step': 138, 'epoch': 1} {'type': 'loss', 'content': 0.01293948758393526, 'timestamp': '2025-09-10 02:16:08.634698', 'step': 139, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 7593922352128}, 'timestamp': '2025-09-10 02:16:08.664634', 'step': 139, 'epoch': 1} {'type': 'loss', 'content': 0.014859228394925594, 'timestamp': '2025-09-10 02:16:08.693246', 'step': 140, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 352], 'flops': 10441544708224}, 'timestamp': '2025-09-10 02:16:08.725823', 'step': 140, 'epoch': 1} {'type': 'loss', 'content': 0.030040746554732323, 'timestamp': '2025-09-10 02:16:08.738785', 'step': 141, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 7119318626112}, 'timestamp': '2025-09-10 02:16:08.769782', 'step': 141, 'epoch': 1} {'type': 'loss', 'content': 0.015265722759068012, 'timestamp': '2025-09-10 02:16:08.777294', 'step': 142, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 7119318626112}, 'timestamp': '2025-09-10 02:16:08.807493', 'step': 142, 'epoch': 1} {'type': 'loss', 'content': 0.02200271561741829, 'timestamp': '2025-09-10 02:16:08.814902', 'step': 143, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 7593922352128}, 'timestamp': '2025-09-10 02:16:08.845397', 'step': 143, 'epoch': 1} {'type': 'loss', 'content': 0.023353280499577522, 'timestamp': '2025-09-10 02:16:08.873964', 'step': 144, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 8068526078144}, 'timestamp': '2025-09-10 02:16:08.904724', 'step': 144, 'epoch': 1} {'type': 'loss', 'content': 0.01525102648884058, 'timestamp': '2025-09-10 02:16:08.912397', 'step': 145, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 6170111174080}, 'timestamp': '2025-09-10 02:16:08.942660', 'step': 145, 'epoch': 1} {'type': 'loss', 'content': 0.02498231828212738, 'timestamp': '2025-09-10 02:16:08.949473', 'step': 146, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 8068526078144}, 'timestamp': '2025-09-10 02:16:08.979736', 'step': 146, 'epoch': 1} {'type': 'loss', 'content': 0.0227807704359293, 'timestamp': '2025-09-10 02:16:08.989868', 'step': 147, 'epoch': 1} {'type': 'flops', 'content': [{'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5695213340928}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3796808916480}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4429610391296}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4746011128704}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4746011128704}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6011614078336}, {'type': 'perplexity', 'in_batch_dim': [8, 736], 'batch_size': 8, 'flops': 14554433988352}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3796808916480}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4113209653888}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3796808916480}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4746011128704}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2847606704256}, {'type': 'perplexity', 'in_batch_dim': [8, 320], 'batch_size': 8, 'flops': 6328014815744}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5378812603520}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3796808916480}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4746011128704}, {'type': 'perplexity', 'in_batch_dim': [8, 352], 'batch_size': 8, 'flops': 6960816290560}, {'type': 'perplexity', 'in_batch_dim': [8, 384], 'batch_size': 8, 'flops': 7593617765376}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5378812603520}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4429610391296}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4113209653888}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5062411866112}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5062411866112}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4429610391296}, {'type': 'perplexity', 'in_batch_dim': [8, 480], 'batch_size': 8, 'flops': 9492022189824}, {'type': 'perplexity', 'in_batch_dim': [8, 448], 'batch_size': 8, 'flops': 8859220715008}, {'type': 'perplexity', 'in_batch_dim': [8, 320], 'batch_size': 8, 'flops': 6328014815744}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3796808916480}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6011614078336}, {'type': 'perplexity', 'in_batch_dim': [8, 320], 'batch_size': 8, 'flops': 6328014815744}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5695213340928}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4746011128704}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4429610391296}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5378812603520}, {'type': 'perplexity', 'in_batch_dim': [8, 544], 'batch_size': 8, 'flops': 10757625139456}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4429610391296}, {'type': 'perplexity', 'in_batch_dim': [8, 320], 'batch_size': 8, 'flops': 6328014815744}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4113209653888}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4113209653888}, {'type': 'perplexity', 'in_batch_dim': [8, 320], 'batch_size': 8, 'flops': 6328014815744}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3480408179072}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5062411866112}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6011614078336}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4429610391296}, {'type': 'perplexity', 'in_batch_dim': [8, 384], 'batch_size': 8, 'flops': 7593617765376}, {'type': 'perplexity', 'in_batch_dim': [8, 416], 'batch_size': 8, 'flops': 8226419240192}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4746011128704}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3480408179072}, {'type': 'perplexity', 'in_batch_dim': [8, 384], 'batch_size': 8, 'flops': 7593617765376}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3480408179072}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3480408179072}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5378812603520}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3796808916480}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4113209653888}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5062411866112}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5378812603520}, {'type': 'perplexity', 'in_batch_dim': [8, 320], 'batch_size': 8, 'flops': 6328014815744}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4429610391296}, {'type': 'perplexity', 'in_batch_dim': [8, 160], 'batch_size': 8, 'flops': 3164007441664}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4113209653888}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5695213340928}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3796808916480}, {'type': 'perplexity', 'in_batch_dim': [8, 160], 'batch_size': 8, 'flops': 3164007441664}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6011614078336}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5062411866112}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4429610391296}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3480408179072}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4746011128704}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3480408179072}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4113209653888}, {'type': 'perplexity', 'in_batch_dim': [8, 624], 'batch_size': 8, 'flops': 12339628826496}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4113209653888}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5695213340928}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5062411866112}, {'type': 'perplexity', 'in_batch_dim': [8, 384], 'batch_size': 8, 'flops': 7593617765376}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4429610391296}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4113209653888}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5062411866112}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4429610391296}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5695213340928}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3796808916480}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5378812603520}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4429610391296}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5378812603520}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3796808916480}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5695213340928}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4429610391296}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5062411866112}, {'type': 'perplexity', 'in_batch_dim': [8, 160], 'batch_size': 8, 'flops': 3164007441664}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4746011128704}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5695213340928}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3796808916480}, {'type': 'perplexity', 'in_batch_dim': [8, 416], 'batch_size': 8, 'flops': 8226419240192}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3796808916480}, {'type': 'perplexity', 'in_batch_dim': [8, 160], 'batch_size': 8, 'flops': 3164007441664}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4746011128704}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3796808916480}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2847606704256}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5378812603520}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4429610391296}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4113209653888}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4429610391296}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6011614078336}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4113209653888}, {'type': 'perplexity', 'in_batch_dim': [8, 320], 'batch_size': 8, 'flops': 6328014815744}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4746011128704}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4746011128704}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3796808916480}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5062411866112}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3480408179072}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3796808916480}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3480408179072}, {'type': 'perplexity', 'in_batch_dim': [8, 352], 'batch_size': 8, 'flops': 6960816290560}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5062411866112}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4429610391296}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6011614078336}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5062411866112}, {'type': 'perplexity', 'in_batch_dim': [8, 576], 'batch_size': 8, 'flops': 11390426614272}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4429610391296}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3796808916480}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6011614078336}, {'type': 'perplexity', 'in_batch_dim': [8, 432], 'batch_size': 8, 'flops': 8542819977600}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5378812603520}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5378812603520}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5062411866112}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4113209653888}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4113209653888}, {'type': 'perplexity', 'in_batch_dim': [8, 368], 'batch_size': 8, 'flops': 7277217027968}, {'type': 'perplexity', 'in_batch_dim': [8, 160], 'batch_size': 8, 'flops': 3164007441664}, {'type': 'perplexity', 'in_batch_dim': [8, 368], 'batch_size': 8, 'flops': 7277217027968}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3796808916480}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4746011128704}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4113209653888}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4429610391296}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5695213340928}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5695213340928}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5062411866112}, {'type': 'perplexity', 'in_batch_dim': [8, 432], 'batch_size': 8, 'flops': 8542819977600}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5062411866112}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4113209653888}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3480408179072}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3480408179072}, {'type': 'perplexity', 'in_batch_dim': [8, 160], 'batch_size': 8, 'flops': 3164007441664}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4113209653888}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3796808916480}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5062411866112}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 368], 'batch_size': 8, 'flops': 7277217027968}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5378812603520}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5378812603520}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5062411866112}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2847606704256}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4429610391296}, {'type': 'perplexity', 'in_batch_dim': [8, 320], 'batch_size': 8, 'flops': 6328014815744}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3796808916480}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4429610391296}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4113209653888}, {'type': 'perplexity', 'in_batch_dim': [8, 384], 'batch_size': 8, 'flops': 7593617765376}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5378812603520}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4746011128704}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3480408179072}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5062411866112}, {'type': 'perplexity', 'in_batch_dim': [8, 320], 'batch_size': 8, 'flops': 6328014815744}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5378812603520}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4113209653888}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4113209653888}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5695213340928}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3480408179072}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6011614078336}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4746011128704}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5378812603520}, {'type': 'perplexity', 'in_batch_dim': [8, 320], 'batch_size': 8, 'flops': 6328014815744}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4429610391296}, {'type': 'perplexity', 'in_batch_dim': [8, 320], 'batch_size': 8, 'flops': 6328014815744}, {'type': 'perplexity', 'in_batch_dim': [8, 416], 'batch_size': 8, 'flops': 8226419240192}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5062411866112}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5062411866112}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2847606704256}, {'type': 'perplexity', 'in_batch_dim': [8, 336], 'batch_size': 8, 'flops': 6644415553152}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6011614078336}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5062411866112}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4113209653888}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5695213340928}, {'type': 'perplexity', 'in_batch_dim': [8, 416], 'batch_size': 8, 'flops': 8226419240192}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3796808916480}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5695213340928}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4746011128704}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4429610391296}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4113209653888}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3480408179072}, {'type': 'perplexity', 'in_batch_dim': [8, 320], 'batch_size': 8, 'flops': 6328014815744}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3480408179072}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5378812603520}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6011614078336}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4429610391296}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5695213340928}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5695213340928}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3480408179072}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4429610391296}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5062411866112}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3480408179072}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4113209653888}, {'type': 'perplexity', 'in_batch_dim': [8, 352], 'batch_size': 8, 'flops': 6960816290560}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4113209653888}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2847606704256}, {'type': 'perplexity', 'in_batch_dim': [8, 160], 'batch_size': 8, 'flops': 3164007441664}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3796808916480}, {'type': 'perplexity', 'in_batch_dim': [8, 496], 'batch_size': 8, 'flops': 9808422927232}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3480408179072}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4113209653888}, {'type': 'perplexity', 'in_batch_dim': [8, 544], 'batch_size': 8, 'flops': 10757625139456}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6011614078336}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5378812603520}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5695213340928}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4746011128704}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5695213340928}, {'type': 'perplexity', 'in_batch_dim': [8, 384], 'batch_size': 8, 'flops': 7593617765376}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3480408179072}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5378812603520}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3480408179072}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5378812603520}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5062411866112}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3796808916480}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5062411866112}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5695213340928}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5378812603520}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4429610391296}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5062411866112}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3480408179072}, {'type': 'perplexity', 'in_batch_dim': [8, 656], 'batch_size': 8, 'flops': 12972430301312}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5062411866112}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3796808916480}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3480408179072}, {'type': 'perplexity', 'in_batch_dim': [8, 160], 'batch_size': 8, 'flops': 3164007441664}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5695213340928}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6011614078336}, {'type': 'perplexity', 'in_batch_dim': [8, 320], 'batch_size': 8, 'flops': 6328014815744}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5062411866112}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5062411866112}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5695213340928}, {'type': 'perplexity', 'in_batch_dim': [8, 320], 'batch_size': 8, 'flops': 6328014815744}, {'type': 'perplexity', 'in_batch_dim': [8, 384], 'batch_size': 8, 'flops': 7593617765376}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5062411866112}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4746011128704}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4113209653888}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4746011128704}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4746011128704}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3480408179072}, {'type': 'perplexity', 'in_batch_dim': [8, 320], 'batch_size': 8, 'flops': 6328014815744}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5695213340928}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5062411866112}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4113209653888}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4746011128704}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2847606704256}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3796808916480}, {'type': 'perplexity', 'in_batch_dim': [8, 336], 'batch_size': 8, 'flops': 6644415553152}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4113209653888}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4746011128704}, {'type': 'perplexity', 'in_batch_dim': [8, 496], 'batch_size': 8, 'flops': 9808422927232}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5378812603520}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5378812603520}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4429610391296}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5062411866112}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6011614078336}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4746011128704}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3796808916480}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4113209653888}, {'type': 'perplexity', 'in_batch_dim': [8, 496], 'batch_size': 8, 'flops': 9808422927232}, {'type': 'perplexity', 'in_batch_dim': [8, 448], 'batch_size': 8, 'flops': 8859220715008}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6011614078336}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3796808916480}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3480408179072}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5062411866112}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4429610391296}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4113209653888}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4746011128704}, {'type': 'perplexity', 'in_batch_dim': [8, 320], 'batch_size': 8, 'flops': 6328014815744}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6011614078336}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3480408179072}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3796808916480}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3796808916480}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4746011128704}, {'type': 'perplexity', 'in_batch_dim': [8, 416], 'batch_size': 8, 'flops': 8226419240192}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5695213340928}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6011614078336}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6011614078336}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4746011128704}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3480408179072}, {'type': 'perplexity', 'in_batch_dim': [8, 160], 'batch_size': 8, 'flops': 3164007441664}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5695213340928}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3480408179072}, {'type': 'perplexity', 'in_batch_dim': [8, 400], 'batch_size': 8, 'flops': 7910018502784}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4429610391296}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5378812603520}, {'type': 'perplexity', 'in_batch_dim': [8, 400], 'batch_size': 8, 'flops': 7910018502784}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5378812603520}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3480408179072}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6011614078336}, {'type': 'perplexity', 'in_batch_dim': [8, 160], 'batch_size': 8, 'flops': 3164007441664}, {'type': 'perplexity', 'in_batch_dim': [8, 352], 'batch_size': 8, 'flops': 6960816290560}, {'type': 'perplexity', 'in_batch_dim': [8, 320], 'batch_size': 8, 'flops': 6328014815744}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5062411866112}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5062411866112}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4113209653888}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4429610391296}, {'type': 'perplexity', 'in_batch_dim': [8, 352], 'batch_size': 8, 'flops': 6960816290560}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4113209653888}, {'type': 'perplexity', 'in_batch_dim': [8, 160], 'batch_size': 8, 'flops': 3164007441664}, {'type': 'perplexity', 'in_batch_dim': [8, 336], 'batch_size': 8, 'flops': 6644415553152}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4429610391296}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3796808916480}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5378812603520}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5378812603520}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4113209653888}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5378812603520}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3796808916480}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5378812603520}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5378812603520}, {'type': 'perplexity', 'in_batch_dim': [8, 160], 'batch_size': 8, 'flops': 3164007441664}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3480408179072}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6011614078336}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5695213340928}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5378812603520}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5378812603520}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4429610391296}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4746011128704}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6011614078336}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5062411866112}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6011614078336}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5378812603520}, {'type': 'perplexity', 'in_batch_dim': [8, 160], 'batch_size': 8, 'flops': 3164007441664}, {'type': 'perplexity', 'in_batch_dim': [8, 160], 'batch_size': 8, 'flops': 3164007441664}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5062411866112}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5062411866112}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3796808916480}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6011614078336}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4113209653888}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4746011128704}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5378812603520}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3480408179072}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3796808916480}, {'type': 'perplexity', 'in_batch_dim': [8, 320], 'batch_size': 8, 'flops': 6328014815744}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5062411866112}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4746011128704}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5062411866112}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4746011128704}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6011614078336}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3796808916480}, {'type': 'perplexity', 'in_batch_dim': [8, 160], 'batch_size': 8, 'flops': 3164007441664}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2847606704256}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4429610391296}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4746011128704}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5062411866112}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5378812603520}, {'type': 'perplexity', 'in_batch_dim': [8, 464], 'batch_size': 8, 'flops': 9175621452416}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4113209653888}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3480408179072}, {'type': 'perplexity', 'in_batch_dim': [8, 384], 'batch_size': 8, 'flops': 7593617765376}, {'type': 'perplexity', 'in_batch_dim': [8, 448], 'batch_size': 8, 'flops': 8859220715008}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5378812603520}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4113209653888}, {'type': 'perplexity', 'in_batch_dim': [8, 320], 'batch_size': 8, 'flops': 6328014815744}, {'type': 'perplexity', 'in_batch_dim': [8, 560], 'batch_size': 8, 'flops': 11074025876864}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5378812603520}, {'type': 'perplexity', 'in_batch_dim': [8, 160], 'batch_size': 8, 'flops': 3164007441664}, {'type': 'perplexity', 'in_batch_dim': [8, 320], 'batch_size': 8, 'flops': 6328014815744}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5062411866112}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6011614078336}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4746011128704}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6011614078336}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3796808916480}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4429610391296}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4113209653888}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4746011128704}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3480408179072}, {'type': 'perplexity', 'in_batch_dim': [8, 384], 'batch_size': 8, 'flops': 7593617765376}, {'type': 'perplexity', 'in_batch_dim': [8, 576], 'batch_size': 8, 'flops': 11390426614272}, {'type': 'perplexity', 'in_batch_dim': [8, 160], 'batch_size': 8, 'flops': 3164007441664}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4746011128704}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4746011128704}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4429610391296}, {'type': 'perplexity', 'in_batch_dim': [8, 336], 'batch_size': 8, 'flops': 6644415553152}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4746011128704}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3796808916480}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3796808916480}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5062411866112}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5378812603520}, {'type': 'perplexity', 'in_batch_dim': [8, 368], 'batch_size': 8, 'flops': 7277217027968}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3480408179072}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4113209653888}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4746011128704}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3480408179072}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4746011128704}, {'type': 'perplexity', 'in_batch_dim': [8, 1168], 'batch_size': 8, 'flops': 23097253898368}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5062411866112}, {'type': 'perplexity', 'in_batch_dim': [8, 160], 'batch_size': 8, 'flops': 3164007441664}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4746011128704}, {'type': 'perplexity', 'in_batch_dim': [8, 640], 'batch_size': 8, 'flops': 12656029563904}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5378812603520}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4429610391296}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4429610391296}, {'type': 'perplexity', 'in_batch_dim': [8, 384], 'batch_size': 8, 'flops': 7593617765376}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3480408179072}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4113209653888}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3796808916480}, {'type': 'perplexity', 'in_batch_dim': [8, 336], 'batch_size': 8, 'flops': 6644415553152}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3480408179072}, {'type': 'perplexity', 'in_batch_dim': [6, 208], 'batch_size': 8, 'flops': 4113209653888}], 'timestamp': '2025-09-10 02:16:20.735353', 'step': 147, 'epoch': 1} {'type': 'pplx', 'content': 12191892.104022551, 'timestamp': '2025-09-10 02:16:20.742172', 'step': 147, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 7593922352128}, 'timestamp': '2025-09-10 02:16:20.777302', 'step': 147, 'epoch': 1} {'type': 'loss', 'content': 0.01853315904736519, 'timestamp': '2025-09-10 02:16:20.804672', 'step': 148, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 6644714900096}, 'timestamp': '2025-09-10 02:16:20.847582', 'step': 148, 'epoch': 1} {'type': 'loss', 'content': 0.01748022995889187, 'timestamp': '2025-09-10 02:16:20.852109', 'step': 149, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 6170111174080}, 'timestamp': '2025-09-10 02:16:20.892051', 'step': 149, 'epoch': 1} {'type': 'loss', 'content': 0.016961688175797462, 'timestamp': '2025-09-10 02:16:20.898764', 'step': 150, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 368], 'flops': 10916148434240}, 'timestamp': '2025-09-10 02:16:20.955270', 'step': 150, 'epoch': 1} {'type': 'loss', 'content': 0.03245147690176964, 'timestamp': '2025-09-10 02:16:20.968525', 'step': 151, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 8068526078144}, 'timestamp': '2025-09-10 02:16:21.030461', 'step': 151, 'epoch': 1} {'type': 'loss', 'content': 0.023213069885969162, 'timestamp': '2025-09-10 02:16:21.061308', 'step': 152, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-10 02:16:21.113790', 'step': 152, 'epoch': 1} {'type': 'loss', 'content': 0.02709483541548252, 'timestamp': '2025-09-10 02:16:21.117104', 'step': 153, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 6170111174080}, 'timestamp': '2025-09-10 02:16:21.150904', 'step': 153, 'epoch': 1} {'type': 'loss', 'content': 0.02957731857895851, 'timestamp': '2025-09-10 02:16:21.157774', 'step': 154, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 320], 'flops': 9492337256192}, 'timestamp': '2025-09-10 02:16:21.192385', 'step': 154, 'epoch': 1} {'type': 'loss', 'content': 0.0102442791685462, 'timestamp': '2025-09-10 02:16:21.204856', 'step': 155, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 176], 'flops': 5220903722048}, 'timestamp': '2025-09-10 02:16:21.237494', 'step': 155, 'epoch': 1} {'type': 'loss', 'content': 0.026265621185302734, 'timestamp': '2025-09-10 02:16:21.262502', 'step': 156, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 4746299996032}, 'timestamp': '2025-09-10 02:16:21.295340', 'step': 156, 'epoch': 1} {'type': 'loss', 'content': 0.022335294634103775, 'timestamp': '2025-09-10 02:16:21.298538', 'step': 157, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 304], 'flops': 9017733530176}, 'timestamp': '2025-09-10 02:16:21.332105', 'step': 157, 'epoch': 1} {'type': 'loss', 'content': 0.011588959954679012, 'timestamp': '2025-09-10 02:16:21.344331', 'step': 158, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 320], 'flops': 9492337256192}, 'timestamp': '2025-09-10 02:16:21.377620', 'step': 158, 'epoch': 1} {'type': 'loss', 'content': 0.019390691071748734, 'timestamp': '2025-09-10 02:16:21.390111', 'step': 159, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 7119318626112}, 'timestamp': '2025-09-10 02:16:21.421280', 'step': 159, 'epoch': 1} {'type': 'loss', 'content': 0.02968760021030903, 'timestamp': '2025-09-10 02:16:21.449406', 'step': 160, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 5695507448064}, 'timestamp': '2025-09-10 02:16:21.481678', 'step': 160, 'epoch': 1} {'type': 'loss', 'content': 0.03552708774805069, 'timestamp': '2025-09-10 02:16:21.484727', 'step': 161, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 5695507448064}, 'timestamp': '2025-09-10 02:16:21.517175', 'step': 161, 'epoch': 1} {'type': 'loss', 'content': 0.014869497157633305, 'timestamp': '2025-09-10 02:16:21.522775', 'step': 162, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 6170111174080}, 'timestamp': '2025-09-10 02:16:21.556750', 'step': 162, 'epoch': 1} {'type': 'loss', 'content': 0.022128138691186905, 'timestamp': '2025-09-10 02:16:21.563452', 'step': 163, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 6644714900096}, 'timestamp': '2025-09-10 02:16:21.595358', 'step': 163, 'epoch': 1} {'type': 'loss', 'content': 0.014087623916566372, 'timestamp': '2025-09-10 02:16:21.623091', 'step': 164, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 6170111174080}, 'timestamp': '2025-09-10 02:16:21.655243', 'step': 164, 'epoch': 1} {'type': 'loss', 'content': 0.010876579210162163, 'timestamp': '2025-09-10 02:16:21.659393', 'step': 165, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 8068526078144}, 'timestamp': '2025-09-10 02:16:21.692113', 'step': 165, 'epoch': 1} {'type': 'loss', 'content': 0.02489648386836052, 'timestamp': '2025-09-10 02:16:21.701867', 'step': 166, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 304], 'flops': 9017733530176}, 'timestamp': '2025-09-10 02:16:21.734164', 'step': 166, 'epoch': 1} {'type': 'loss', 'content': 0.02895858697593212, 'timestamp': '2025-09-10 02:16:21.745496', 'step': 167, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 352], 'flops': 10441544708224}, 'timestamp': '2025-09-10 02:16:21.780855', 'step': 167, 'epoch': 1} {'type': 'loss', 'content': 0.02555564045906067, 'timestamp': '2025-09-10 02:16:21.815551', 'step': 168, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 336], 'flops': 9966940982208}, 'timestamp': '2025-09-10 02:16:21.847095', 'step': 168, 'epoch': 1} {'type': 'loss', 'content': 0.02748963236808777, 'timestamp': '2025-09-10 02:16:21.859757', 'step': 169, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 176], 'flops': 5220903722048}, 'timestamp': '2025-09-10 02:16:21.891258', 'step': 169, 'epoch': 1} {'type': 'loss', 'content': 0.013958572410047054, 'timestamp': '2025-09-10 02:16:21.894827', 'step': 170, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 5695507448064}, 'timestamp': '2025-09-10 02:16:21.925187', 'step': 170, 'epoch': 1} {'type': 'loss', 'content': 0.02482200227677822, 'timestamp': '2025-09-10 02:16:21.929660', 'step': 171, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 320], 'flops': 9492337256192}, 'timestamp': '2025-09-10 02:16:21.960629', 'step': 171, 'epoch': 1} {'type': 'loss', 'content': 0.012857136316597462, 'timestamp': '2025-09-10 02:16:21.994081', 'step': 172, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 352], 'flops': 10441544708224}, 'timestamp': '2025-09-10 02:16:22.026787', 'step': 172, 'epoch': 1} {'type': 'loss', 'content': 0.013918432407081127, 'timestamp': '2025-09-10 02:16:22.039823', 'step': 173, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 176], 'flops': 5220903722048}, 'timestamp': '2025-09-10 02:16:22.069812', 'step': 173, 'epoch': 1} {'type': 'loss', 'content': 0.020305844023823738, 'timestamp': '2025-09-10 02:16:22.073912', 'step': 174, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 6644714900096}, 'timestamp': '2025-09-10 02:16:22.106095', 'step': 174, 'epoch': 1} {'type': 'loss', 'content': 0.01923571154475212, 'timestamp': '2025-09-10 02:16:22.112980', 'step': 175, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 320], 'flops': 9492337256192}, 'timestamp': '2025-09-10 02:16:22.145283', 'step': 175, 'epoch': 1} {'type': 'loss', 'content': 0.02769598178565502, 'timestamp': '2025-09-10 02:16:22.177975', 'step': 176, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 6644714900096}, 'timestamp': '2025-09-10 02:16:22.210470', 'step': 176, 'epoch': 1} {'type': 'loss', 'content': 0.01644458808004856, 'timestamp': '2025-09-10 02:16:22.214646', 'step': 177, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 608], 'flops': 18035204324480}, 'timestamp': '2025-09-10 02:16:22.265692', 'step': 177, 'epoch': 1} {'type': 'loss', 'content': 0.02010306902229786, 'timestamp': '2025-09-10 02:16:22.287206', 'step': 178, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 320], 'flops': 9492337256192}, 'timestamp': '2025-09-10 02:16:22.318980', 'step': 178, 'epoch': 1} {'type': 'loss', 'content': 0.020176881924271584, 'timestamp': '2025-09-10 02:16:22.331133', 'step': 179, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 5695507448064}, 'timestamp': '2025-09-10 02:16:22.361522', 'step': 179, 'epoch': 1} {'type': 'loss', 'content': 0.016681145876646042, 'timestamp': '2025-09-10 02:16:22.386733', 'step': 180, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 5695507448064}, 'timestamp': '2025-09-10 02:16:22.417832', 'step': 180, 'epoch': 1} {'type': 'loss', 'content': 0.021172260865569115, 'timestamp': '2025-09-10 02:16:22.420092', 'step': 181, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 352], 'flops': 10441544708224}, 'timestamp': '2025-09-10 02:16:22.453254', 'step': 181, 'epoch': 1} {'type': 'loss', 'content': 0.028826581314206123, 'timestamp': '2025-09-10 02:16:22.466939', 'step': 182, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 384], 'flops': 11390752160256}, 'timestamp': '2025-09-10 02:16:22.501100', 'step': 182, 'epoch': 1} {'type': 'loss', 'content': 0.023097632452845573, 'timestamp': '2025-09-10 02:16:22.515058', 'step': 183, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 5695507448064}, 'timestamp': '2025-09-10 02:16:22.545590', 'step': 183, 'epoch': 1} {'type': 'loss', 'content': 0.027354659512639046, 'timestamp': '2025-09-10 02:16:22.570952', 'step': 184, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 6170111174080}, 'timestamp': '2025-09-10 02:16:22.600953', 'step': 184, 'epoch': 1} {'type': 'loss', 'content': 0.022371714934706688, 'timestamp': '2025-09-10 02:16:22.605532', 'step': 185, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 6170111174080}, 'timestamp': '2025-09-10 02:16:22.637703', 'step': 185, 'epoch': 1} {'type': 'loss', 'content': 0.02118653617799282, 'timestamp': '2025-09-10 02:16:22.644663', 'step': 186, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 5695507448064}, 'timestamp': '2025-09-10 02:16:22.675209', 'step': 186, 'epoch': 1} {'type': 'loss', 'content': 0.005522268824279308, 'timestamp': '2025-09-10 02:16:22.679864', 'step': 187, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 8068526078144}, 'timestamp': '2025-09-10 02:16:22.710239', 'step': 187, 'epoch': 1} {'type': 'loss', 'content': 0.017090152949094772, 'timestamp': '2025-09-10 02:16:22.741196', 'step': 188, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 5695507448064}, 'timestamp': '2025-09-10 02:16:22.770746', 'step': 188, 'epoch': 1} {'type': 'loss', 'content': 0.02689528279006481, 'timestamp': '2025-09-10 02:16:22.775360', 'step': 189, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 7593922352128}, 'timestamp': '2025-09-10 02:16:22.807716', 'step': 189, 'epoch': 1} {'type': 'loss', 'content': 0.017292974516749382, 'timestamp': '2025-09-10 02:16:22.815250', 'step': 190, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 6644714900096}, 'timestamp': '2025-09-10 02:16:22.845775', 'step': 190, 'epoch': 1} {'type': 'loss', 'content': 0.019936595112085342, 'timestamp': '2025-09-10 02:16:22.852588', 'step': 191, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 368], 'flops': 10916148434240}, 'timestamp': '2025-09-10 02:16:22.886413', 'step': 191, 'epoch': 1} {'type': 'loss', 'content': 0.01834060624241829, 'timestamp': '2025-09-10 02:16:22.921163', 'step': 192, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 7593922352128}, 'timestamp': '2025-09-10 02:16:22.952122', 'step': 192, 'epoch': 1} {'type': 'loss', 'content': 0.009056499227881432, 'timestamp': '2025-09-10 02:16:22.957463', 'step': 193, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 5695507448064}, 'timestamp': '2025-09-10 02:16:22.987267', 'step': 193, 'epoch': 1} {'type': 'loss', 'content': 0.02178066037595272, 'timestamp': '2025-09-10 02:16:22.991642', 'step': 194, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 5695507448064}, 'timestamp': '2025-09-10 02:16:23.024415', 'step': 194, 'epoch': 1} {'type': 'loss', 'content': 0.023802533745765686, 'timestamp': '2025-09-10 02:16:23.030527', 'step': 195, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 7593922352128}, 'timestamp': '2025-09-10 02:16:23.062627', 'step': 195, 'epoch': 1} {'type': 'loss', 'content': 0.02088129334151745, 'timestamp': '2025-09-10 02:16:23.091433', 'step': 196, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 8068526078144}, 'timestamp': '2025-09-10 02:16:23.125721', 'step': 196, 'epoch': 1} {'type': 'loss', 'content': 0.03935558721423149, 'timestamp': '2025-09-10 02:16:23.133504', 'step': 197, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 6644714900096}, 'timestamp': '2025-09-10 02:16:23.169617', 'step': 197, 'epoch': 1} {'type': 'loss', 'content': 0.02025543339550495, 'timestamp': '2025-09-10 02:16:23.176598', 'step': 198, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 6644714900096}, 'timestamp': '2025-09-10 02:16:23.208914', 'step': 198, 'epoch': 1} {'type': 'loss', 'content': 0.012568363919854164, 'timestamp': '2025-09-10 02:16:23.216126', 'step': 199, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 8543129804160}, 'timestamp': '2025-09-10 02:16:23.246371', 'step': 199, 'epoch': 1} {'type': 'loss', 'content': 0.013729465194046497, 'timestamp': '2025-09-10 02:16:23.278223', 'step': 200, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 6170111174080}, 'timestamp': '2025-09-10 02:16:23.309443', 'step': 200, 'epoch': 1} {'type': 'loss', 'content': 0.01759318821132183, 'timestamp': '2025-09-10 02:16:23.313924', 'step': 201, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 7119318626112}, 'timestamp': '2025-09-10 02:16:23.344145', 'step': 201, 'epoch': 1} {'type': 'loss', 'content': 0.007888035848736763, 'timestamp': '2025-09-10 02:16:23.351604', 'step': 202, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 7119318626112}, 'timestamp': '2025-09-10 02:16:23.382940', 'step': 202, 'epoch': 1} {'type': 'loss', 'content': 0.00965458806604147, 'timestamp': '2025-09-10 02:16:23.390314', 'step': 203, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 7593922352128}, 'timestamp': '2025-09-10 02:16:23.423895', 'step': 203, 'epoch': 1} {'type': 'loss', 'content': 0.01958434283733368, 'timestamp': '2025-09-10 02:16:23.452560', 'step': 204, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 7593922352128}, 'timestamp': '2025-09-10 02:16:23.481703', 'step': 204, 'epoch': 1} {'type': 'loss', 'content': 0.007743260823190212, 'timestamp': '2025-09-10 02:16:23.487143', 'step': 205, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 7119318626112}, 'timestamp': '2025-09-10 02:16:23.517951', 'step': 205, 'epoch': 1} {'type': 'loss', 'content': 0.004702796693891287, 'timestamp': '2025-09-10 02:16:23.525371', 'step': 206, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 6170111174080}, 'timestamp': '2025-09-10 02:16:23.555186', 'step': 206, 'epoch': 1} {'type': 'loss', 'content': 0.01660262979567051, 'timestamp': '2025-09-10 02:16:23.562112', 'step': 207, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 304], 'flops': 9017733530176}, 'timestamp': '2025-09-10 02:16:23.593403', 'step': 207, 'epoch': 1} {'type': 'loss', 'content': 0.01599551923573017, 'timestamp': '2025-09-10 02:16:23.626330', 'step': 208, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 7593922352128}, 'timestamp': '2025-09-10 02:16:23.658169', 'step': 208, 'epoch': 1} {'type': 'loss', 'content': 0.013740134425461292, 'timestamp': '2025-09-10 02:16:23.662707', 'step': 209, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 176], 'flops': 5220903722048}, 'timestamp': '2025-09-10 02:16:23.693156', 'step': 209, 'epoch': 1} {'type': 'loss', 'content': 0.017071815207600594, 'timestamp': '2025-09-10 02:16:23.696982', 'step': 210, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 7593922352128}, 'timestamp': '2025-09-10 02:16:23.731153', 'step': 210, 'epoch': 1} {'type': 'loss', 'content': 0.021906418725848198, 'timestamp': '2025-09-10 02:16:23.738722', 'step': 211, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 5695507448064}, 'timestamp': '2025-09-10 02:16:23.775191', 'step': 211, 'epoch': 1} {'type': 'loss', 'content': 0.008046785369515419, 'timestamp': '2025-09-10 02:16:23.800487', 'step': 212, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 5695507448064}, 'timestamp': '2025-09-10 02:16:23.830266', 'step': 212, 'epoch': 1} {'type': 'loss', 'content': 0.006279070395976305, 'timestamp': '2025-09-10 02:16:23.832367', 'step': 213, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 176], 'flops': 5220903722048}, 'timestamp': '2025-09-10 02:16:23.862918', 'step': 213, 'epoch': 1} {'type': 'loss', 'content': 0.025564759969711304, 'timestamp': '2025-09-10 02:16:23.866636', 'step': 214, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 7119318626112}, 'timestamp': '2025-09-10 02:16:23.898046', 'step': 214, 'epoch': 1} {'type': 'loss', 'content': 0.01694483682513237, 'timestamp': '2025-09-10 02:16:23.905447', 'step': 215, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 6170111174080}, 'timestamp': '2025-09-10 02:16:23.938422', 'step': 215, 'epoch': 1} {'type': 'loss', 'content': 0.028261274099349976, 'timestamp': '2025-09-10 02:16:23.966060', 'step': 216, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 5695507448064}, 'timestamp': '2025-09-10 02:16:23.995793', 'step': 216, 'epoch': 1} {'type': 'loss', 'content': 0.008908641524612904, 'timestamp': '2025-09-10 02:16:23.997994', 'step': 217, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 336], 'flops': 9966940982208}, 'timestamp': '2025-09-10 02:16:24.033911', 'step': 217, 'epoch': 1} {'type': 'loss', 'content': 0.049135930836200714, 'timestamp': '2025-09-10 02:16:24.047306', 'step': 218, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 6644714900096}, 'timestamp': '2025-09-10 02:16:24.077407', 'step': 218, 'epoch': 1} {'type': 'loss', 'content': 0.009086393751204014, 'timestamp': '2025-09-10 02:16:24.085796', 'step': 219, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 320], 'flops': 9492337256192}, 'timestamp': '2025-09-10 02:16:24.121929', 'step': 219, 'epoch': 1} {'type': 'loss', 'content': 0.01568550243973732, 'timestamp': '2025-09-10 02:16:24.155378', 'step': 220, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 352], 'flops': 10441544708224}, 'timestamp': '2025-09-10 02:16:24.187632', 'step': 220, 'epoch': 1} {'type': 'loss', 'content': 0.011114334687590599, 'timestamp': '2025-09-10 02:16:24.200647', 'step': 221, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 6644714900096}, 'timestamp': '2025-09-10 02:16:24.237581', 'step': 221, 'epoch': 1} {'type': 'loss', 'content': 0.02597637288272381, 'timestamp': '2025-09-10 02:16:24.244807', 'step': 222, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 7119318626112}, 'timestamp': '2025-09-10 02:16:24.278536', 'step': 222, 'epoch': 1} {'type': 'loss', 'content': 0.011223288252949715, 'timestamp': '2025-09-10 02:16:24.285951', 'step': 223, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 6170111174080}, 'timestamp': '2025-09-10 02:16:24.317555', 'step': 223, 'epoch': 1} {'type': 'loss', 'content': 0.010914224199950695, 'timestamp': '2025-09-10 02:16:24.345436', 'step': 224, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 528], 'flops': 15662185694400}, 'timestamp': '2025-09-10 02:16:24.397785', 'step': 224, 'epoch': 1} {'type': 'loss', 'content': 0.009774766862392426, 'timestamp': '2025-09-10 02:16:24.413366', 'step': 225, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 8543129804160}, 'timestamp': '2025-09-10 02:16:24.445516', 'step': 225, 'epoch': 1} {'type': 'loss', 'content': 0.03243091329932213, 'timestamp': '2025-09-10 02:16:24.456124', 'step': 226, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 8543129804160}, 'timestamp': '2025-09-10 02:16:24.487958', 'step': 226, 'epoch': 1} {'type': 'loss', 'content': 0.005026062484830618, 'timestamp': '2025-09-10 02:16:24.498899', 'step': 227, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 5695507448064}, 'timestamp': '2025-09-10 02:16:24.531723', 'step': 227, 'epoch': 1} {'type': 'loss', 'content': 0.017378708347678185, 'timestamp': '2025-09-10 02:16:24.557445', 'step': 228, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 336], 'flops': 9966940982208}, 'timestamp': '2025-09-10 02:16:24.588636', 'step': 228, 'epoch': 1} {'type': 'loss', 'content': 0.030252641066908836, 'timestamp': '2025-09-10 02:16:24.601298', 'step': 229, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 320], 'flops': 9492337256192}, 'timestamp': '2025-09-10 02:16:24.633811', 'step': 229, 'epoch': 1} {'type': 'loss', 'content': 0.02279387228190899, 'timestamp': '2025-09-10 02:16:24.646266', 'step': 230, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 400], 'flops': 11865355886272}, 'timestamp': '2025-09-10 02:16:24.683908', 'step': 230, 'epoch': 1} {'type': 'loss', 'content': 0.008336501196026802, 'timestamp': '2025-09-10 02:16:24.699519', 'step': 231, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 8068526078144}, 'timestamp': '2025-09-10 02:16:24.730452', 'step': 231, 'epoch': 1} {'type': 'loss', 'content': 0.016707872971892357, 'timestamp': '2025-09-10 02:16:24.761385', 'step': 232, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 304], 'flops': 9017733530176}, 'timestamp': '2025-09-10 02:16:24.794054', 'step': 232, 'epoch': 1} {'type': 'loss', 'content': 0.023161133751273155, 'timestamp': '2025-09-10 02:16:24.803510', 'step': 233, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 304], 'flops': 9017733530176}, 'timestamp': '2025-09-10 02:16:24.839446', 'step': 233, 'epoch': 1} {'type': 'loss', 'content': 0.011448432691395283, 'timestamp': '2025-09-10 02:16:24.851561', 'step': 234, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 6644714900096}, 'timestamp': '2025-09-10 02:16:24.887091', 'step': 234, 'epoch': 1} {'type': 'loss', 'content': 0.034271273761987686, 'timestamp': '2025-09-10 02:16:24.894082', 'step': 235, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 6644714900096}, 'timestamp': '2025-09-10 02:16:24.924893', 'step': 235, 'epoch': 1} {'type': 'loss', 'content': 0.014886337332427502, 'timestamp': '2025-09-10 02:16:24.952841', 'step': 236, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 7593922352128}, 'timestamp': '2025-09-10 02:16:24.991322', 'step': 236, 'epoch': 1} {'type': 'loss', 'content': 0.02193574421107769, 'timestamp': '2025-09-10 02:16:25.010432', 'step': 237, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 6170111174080}, 'timestamp': '2025-09-10 02:16:25.054375', 'step': 237, 'epoch': 1} {'type': 'loss', 'content': 0.009645821526646614, 'timestamp': '2025-09-10 02:16:25.061131', 'step': 238, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 7119318626112}, 'timestamp': '2025-09-10 02:16:25.100555', 'step': 238, 'epoch': 1} {'type': 'loss', 'content': 0.009037821553647518, 'timestamp': '2025-09-10 02:16:25.107882', 'step': 239, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 400], 'flops': 11865355886272}, 'timestamp': '2025-09-10 02:16:25.148957', 'step': 239, 'epoch': 1} {'type': 'loss', 'content': 0.014126168563961983, 'timestamp': '2025-09-10 02:16:25.185418', 'step': 240, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 7593922352128}, 'timestamp': '2025-09-10 02:16:25.218887', 'step': 240, 'epoch': 1} {'type': 'loss', 'content': 0.005452022887766361, 'timestamp': '2025-09-10 02:16:25.224457', 'step': 241, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 6644714900096}, 'timestamp': '2025-09-10 02:16:25.254336', 'step': 241, 'epoch': 1} {'type': 'loss', 'content': 0.03313310071825981, 'timestamp': '2025-09-10 02:16:25.261497', 'step': 242, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 176], 'flops': 5220903722048}, 'timestamp': '2025-09-10 02:16:25.291853', 'step': 242, 'epoch': 1} {'type': 'loss', 'content': 0.03182428702712059, 'timestamp': '2025-09-10 02:16:25.302738', 'step': 243, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-10 02:16:25.338675', 'step': 243, 'epoch': 1} {'type': 'loss', 'content': 0.029811818152666092, 'timestamp': '2025-09-10 02:16:25.366539', 'step': 244, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 7119318626112}, 'timestamp': '2025-09-10 02:16:25.397901', 'step': 244, 'epoch': 1} {'type': 'loss', 'content': 0.03838468715548515, 'timestamp': '2025-09-10 02:16:25.402821', 'step': 245, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 6644714900096}, 'timestamp': '2025-09-10 02:16:25.438631', 'step': 245, 'epoch': 1} {'type': 'loss', 'content': 0.02467919886112213, 'timestamp': '2025-09-10 02:16:25.445474', 'step': 246, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 7593922352128}, 'timestamp': '2025-09-10 02:16:25.479124', 'step': 246, 'epoch': 1} {'type': 'loss', 'content': 0.00823969580233097, 'timestamp': '2025-09-10 02:16:25.486640', 'step': 247, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 416], 'flops': 12339959612288}, 'timestamp': '2025-09-10 02:16:25.528750', 'step': 247, 'epoch': 1} {'type': 'loss', 'content': 0.011138495989143848, 'timestamp': '2025-09-10 02:16:25.565512', 'step': 248, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 7119318626112}, 'timestamp': '2025-09-10 02:16:25.596292', 'step': 248, 'epoch': 1} {'type': 'loss', 'content': 0.03702753037214279, 'timestamp': '2025-09-10 02:16:25.601440', 'step': 249, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 336], 'flops': 9966940982208}, 'timestamp': '2025-09-10 02:16:25.634575', 'step': 249, 'epoch': 1} {'type': 'loss', 'content': 0.007849356159567833, 'timestamp': '2025-09-10 02:16:25.647926', 'step': 250, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 7119318626112}, 'timestamp': '2025-09-10 02:16:25.678453', 'step': 250, 'epoch': 1} {'type': 'loss', 'content': 0.00440265703946352, 'timestamp': '2025-09-10 02:16:25.685668', 'step': 251, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 320], 'flops': 9492337256192}, 'timestamp': '2025-09-10 02:16:25.715578', 'step': 251, 'epoch': 1} {'type': 'loss', 'content': 0.0027811271138489246, 'timestamp': '2025-09-10 02:16:25.749017', 'step': 252, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 5695507448064}, 'timestamp': '2025-09-10 02:16:25.779626', 'step': 252, 'epoch': 1} {'type': 'loss', 'content': 0.002288882387802005, 'timestamp': '2025-09-10 02:16:25.781615', 'step': 253, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 7593922352128}, 'timestamp': '2025-09-10 02:16:25.811818', 'step': 253, 'epoch': 1} {'type': 'loss', 'content': 0.02566692791879177, 'timestamp': '2025-09-10 02:16:25.819734', 'step': 254, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 6644714900096}, 'timestamp': '2025-09-10 02:16:25.851082', 'step': 254, 'epoch': 1} {'type': 'loss', 'content': 0.016085701063275337, 'timestamp': '2025-09-10 02:16:25.857937', 'step': 255, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 7593922352128}, 'timestamp': '2025-09-10 02:16:25.888181', 'step': 255, 'epoch': 1} {'type': 'loss', 'content': 0.007969672791659832, 'timestamp': '2025-09-10 02:16:25.916936', 'step': 256, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 7593922352128}, 'timestamp': '2025-09-10 02:16:25.948366', 'step': 256, 'epoch': 1} {'type': 'loss', 'content': 0.02336304821074009, 'timestamp': '2025-09-10 02:16:25.953983', 'step': 257, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 7119318626112}, 'timestamp': '2025-09-10 02:16:25.986175', 'step': 257, 'epoch': 1} {'type': 'loss', 'content': 0.010644437745213509, 'timestamp': '2025-09-10 02:16:25.993642', 'step': 258, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 5695507448064}, 'timestamp': '2025-09-10 02:16:26.026399', 'step': 258, 'epoch': 1} {'type': 'loss', 'content': 0.009769896045327187, 'timestamp': '2025-09-10 02:16:26.030862', 'step': 259, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 7593922352128}, 'timestamp': '2025-09-10 02:16:26.063269', 'step': 259, 'epoch': 1} {'type': 'loss', 'content': 0.015422756783664227, 'timestamp': '2025-09-10 02:16:26.091962', 'step': 260, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 336], 'flops': 9966940982208}, 'timestamp': '2025-09-10 02:16:26.123418', 'step': 260, 'epoch': 1} {'type': 'loss', 'content': 0.031315069645643234, 'timestamp': '2025-09-10 02:16:26.136180', 'step': 261, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 6644714900096}, 'timestamp': '2025-09-10 02:16:26.166267', 'step': 261, 'epoch': 1} {'type': 'loss', 'content': 0.033414822071790695, 'timestamp': '2025-09-10 02:16:26.173215', 'step': 262, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 4746299996032}, 'timestamp': '2025-09-10 02:16:26.206363', 'step': 262, 'epoch': 1} {'type': 'loss', 'content': 0.015460536815226078, 'timestamp': '2025-09-10 02:16:26.208832', 'step': 263, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 8543129804160}, 'timestamp': '2025-09-10 02:16:26.240930', 'step': 263, 'epoch': 1} {'type': 'loss', 'content': 0.030592020601034164, 'timestamp': '2025-09-10 02:16:26.272768', 'step': 264, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 7593922352128}, 'timestamp': '2025-09-10 02:16:26.302446', 'step': 264, 'epoch': 1} {'type': 'loss', 'content': 0.01532017719000578, 'timestamp': '2025-09-10 02:16:26.307982', 'step': 265, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 336], 'flops': 9966940982208}, 'timestamp': '2025-09-10 02:16:26.341058', 'step': 265, 'epoch': 1} {'type': 'loss', 'content': 0.033243995159864426, 'timestamp': '2025-09-10 02:16:26.354418', 'step': 266, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 7593922352128}, 'timestamp': '2025-09-10 02:16:26.387090', 'step': 266, 'epoch': 1} {'type': 'loss', 'content': 0.017378225922584534, 'timestamp': '2025-09-10 02:16:26.394754', 'step': 267, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-10 02:16:26.424486', 'step': 267, 'epoch': 1} {'type': 'loss', 'content': 0.02742266096174717, 'timestamp': '2025-09-10 02:16:26.448727', 'step': 268, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 6170111174080}, 'timestamp': '2025-09-10 02:16:26.482635', 'step': 268, 'epoch': 1} {'type': 'loss', 'content': 0.04506625607609749, 'timestamp': '2025-09-10 02:16:26.487290', 'step': 269, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 320], 'flops': 9492337256192}, 'timestamp': '2025-09-10 02:16:26.518809', 'step': 269, 'epoch': 1} {'type': 'loss', 'content': 0.013364973478019238, 'timestamp': '2025-09-10 02:16:26.531381', 'step': 270, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 6644714900096}, 'timestamp': '2025-09-10 02:16:26.562970', 'step': 270, 'epoch': 1} {'type': 'loss', 'content': 0.01263825036585331, 'timestamp': '2025-09-10 02:16:26.569994', 'step': 271, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 7593922352128}, 'timestamp': '2025-09-10 02:16:26.602400', 'step': 271, 'epoch': 1} {'type': 'loss', 'content': 0.03316551819443703, 'timestamp': '2025-09-10 02:16:26.630937', 'step': 272, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 6170111174080}, 'timestamp': '2025-09-10 02:16:26.675334', 'step': 272, 'epoch': 1} {'type': 'loss', 'content': 0.019756343215703964, 'timestamp': '2025-09-10 02:16:26.680035', 'step': 273, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 7119318626112}, 'timestamp': '2025-09-10 02:16:26.715960', 'step': 273, 'epoch': 1} {'type': 'loss', 'content': 0.0236830972135067, 'timestamp': '2025-09-10 02:16:26.723355', 'step': 274, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 6644714900096}, 'timestamp': '2025-09-10 02:16:26.761266', 'step': 274, 'epoch': 1} {'type': 'loss', 'content': 0.00928380899131298, 'timestamp': '2025-09-10 02:16:26.768864', 'step': 275, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 176], 'flops': 5220903722048}, 'timestamp': '2025-09-10 02:16:26.810695', 'step': 275, 'epoch': 1} {'type': 'loss', 'content': 0.014733054675161839, 'timestamp': '2025-09-10 02:16:26.835676', 'step': 276, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 352], 'flops': 10441544708224}, 'timestamp': '2025-09-10 02:16:26.871465', 'step': 276, 'epoch': 1} {'type': 'loss', 'content': 0.03336886316537857, 'timestamp': '2025-09-10 02:16:26.884467', 'step': 277, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 6170111174080}, 'timestamp': '2025-09-10 02:16:26.916086', 'step': 277, 'epoch': 1} {'type': 'loss', 'content': 0.008176090195775032, 'timestamp': '2025-09-10 02:16:26.922890', 'step': 278, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 7119318626112}, 'timestamp': '2025-09-10 02:16:26.953543', 'step': 278, 'epoch': 1} {'type': 'loss', 'content': 0.010884806513786316, 'timestamp': '2025-09-10 02:16:26.960988', 'step': 279, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 7119318626112}, 'timestamp': '2025-09-10 02:16:26.991008', 'step': 279, 'epoch': 1} {'type': 'loss', 'content': 0.010812760330736637, 'timestamp': '2025-09-10 02:16:27.019416', 'step': 280, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 304], 'flops': 9017733530176}, 'timestamp': '2025-09-10 02:16:27.052033', 'step': 280, 'epoch': 1} {'type': 'loss', 'content': 0.018427478149533272, 'timestamp': '2025-09-10 02:16:27.061756', 'step': 281, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 4746299996032}, 'timestamp': '2025-09-10 02:16:27.091729', 'step': 281, 'epoch': 1} {'type': 'loss', 'content': 0.005787822883576155, 'timestamp': '2025-09-10 02:16:27.094494', 'step': 282, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 6644714900096}, 'timestamp': '2025-09-10 02:16:27.126564', 'step': 282, 'epoch': 1} {'type': 'loss', 'content': 0.026878537610173225, 'timestamp': '2025-09-10 02:16:27.133510', 'step': 283, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 7119318626112}, 'timestamp': '2025-09-10 02:16:27.163712', 'step': 283, 'epoch': 1} {'type': 'loss', 'content': 0.014809337444603443, 'timestamp': '2025-09-10 02:16:27.191924', 'step': 284, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 4746299996032}, 'timestamp': '2025-09-10 02:16:27.231470', 'step': 284, 'epoch': 1} {'type': 'loss', 'content': 0.0403473936021328, 'timestamp': '2025-09-10 02:16:27.233854', 'step': 285, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 6170111174080}, 'timestamp': '2025-09-10 02:16:27.264630', 'step': 285, 'epoch': 1} {'type': 'loss', 'content': 0.009624199941754341, 'timestamp': '2025-09-10 02:16:27.271396', 'step': 286, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 6644714900096}, 'timestamp': '2025-09-10 02:16:27.307634', 'step': 286, 'epoch': 1} {'type': 'loss', 'content': 0.00741335516795516, 'timestamp': '2025-09-10 02:16:27.314627', 'step': 287, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 6170111174080}, 'timestamp': '2025-09-10 02:16:27.346502', 'step': 287, 'epoch': 1} {'type': 'loss', 'content': 0.018726302310824394, 'timestamp': '2025-09-10 02:16:27.374123', 'step': 288, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 8068526078144}, 'timestamp': '2025-09-10 02:16:27.404826', 'step': 288, 'epoch': 1} {'type': 'loss', 'content': 0.043964944779872894, 'timestamp': '2025-09-10 02:16:27.412272', 'step': 289, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 6644714900096}, 'timestamp': '2025-09-10 02:16:27.443455', 'step': 289, 'epoch': 1} {'type': 'loss', 'content': 0.0012115959543734789, 'timestamp': '2025-09-10 02:16:27.450533', 'step': 290, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 304], 'flops': 9017733530176}, 'timestamp': '2025-09-10 02:16:27.482885', 'step': 290, 'epoch': 1} {'type': 'loss', 'content': 0.016456475481390953, 'timestamp': '2025-09-10 02:16:27.495010', 'step': 291, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 8543129804160}, 'timestamp': '2025-09-10 02:16:27.525943', 'step': 291, 'epoch': 1} {'type': 'loss', 'content': 0.0014793974114581943, 'timestamp': '2025-09-10 02:16:27.557732', 'step': 292, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 8068526078144}, 'timestamp': '2025-09-10 02:16:27.588473', 'step': 292, 'epoch': 1} {'type': 'loss', 'content': 0.01667448878288269, 'timestamp': '2025-09-10 02:16:27.596371', 'step': 293, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 6644714900096}, 'timestamp': '2025-09-10 02:16:27.627182', 'step': 293, 'epoch': 1} {'type': 'loss', 'content': 0.013031461276113987, 'timestamp': '2025-09-10 02:16:27.633929', 'step': 294, 'epoch': 1} {'type': 'flops', 'content': [{'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5695213340928}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3796808916480}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4429610391296}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4746011128704}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4746011128704}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6011614078336}, {'type': 'perplexity', 'in_batch_dim': [8, 736], 'batch_size': 8, 'flops': 14554433988352}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3796808916480}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4113209653888}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3796808916480}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4746011128704}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2847606704256}, {'type': 'perplexity', 'in_batch_dim': [8, 320], 'batch_size': 8, 'flops': 6328014815744}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5378812603520}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3796808916480}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4746011128704}, {'type': 'perplexity', 'in_batch_dim': [8, 352], 'batch_size': 8, 'flops': 6960816290560}, {'type': 'perplexity', 'in_batch_dim': [8, 384], 'batch_size': 8, 'flops': 7593617765376}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5378812603520}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4429610391296}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4113209653888}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5062411866112}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5062411866112}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4429610391296}, {'type': 'perplexity', 'in_batch_dim': [8, 480], 'batch_size': 8, 'flops': 9492022189824}, {'type': 'perplexity', 'in_batch_dim': [8, 448], 'batch_size': 8, 'flops': 8859220715008}, {'type': 'perplexity', 'in_batch_dim': [8, 320], 'batch_size': 8, 'flops': 6328014815744}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3796808916480}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6011614078336}, {'type': 'perplexity', 'in_batch_dim': [8, 320], 'batch_size': 8, 'flops': 6328014815744}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5695213340928}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4746011128704}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4429610391296}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5378812603520}, {'type': 'perplexity', 'in_batch_dim': [8, 544], 'batch_size': 8, 'flops': 10757625139456}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4429610391296}, {'type': 'perplexity', 'in_batch_dim': [8, 320], 'batch_size': 8, 'flops': 6328014815744}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4113209653888}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4113209653888}, {'type': 'perplexity', 'in_batch_dim': [8, 320], 'batch_size': 8, 'flops': 6328014815744}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3480408179072}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5062411866112}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6011614078336}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4429610391296}, {'type': 'perplexity', 'in_batch_dim': [8, 384], 'batch_size': 8, 'flops': 7593617765376}, {'type': 'perplexity', 'in_batch_dim': [8, 416], 'batch_size': 8, 'flops': 8226419240192}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4746011128704}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3480408179072}, {'type': 'perplexity', 'in_batch_dim': [8, 384], 'batch_size': 8, 'flops': 7593617765376}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3480408179072}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3480408179072}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5378812603520}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3796808916480}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4113209653888}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5062411866112}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5378812603520}, {'type': 'perplexity', 'in_batch_dim': [8, 320], 'batch_size': 8, 'flops': 6328014815744}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4429610391296}, {'type': 'perplexity', 'in_batch_dim': [8, 160], 'batch_size': 8, 'flops': 3164007441664}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4113209653888}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5695213340928}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3796808916480}, {'type': 'perplexity', 'in_batch_dim': [8, 160], 'batch_size': 8, 'flops': 3164007441664}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6011614078336}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5062411866112}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4429610391296}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3480408179072}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4746011128704}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3480408179072}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4113209653888}, {'type': 'perplexity', 'in_batch_dim': [8, 624], 'batch_size': 8, 'flops': 12339628826496}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4113209653888}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5695213340928}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5062411866112}, {'type': 'perplexity', 'in_batch_dim': [8, 384], 'batch_size': 8, 'flops': 7593617765376}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4429610391296}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4113209653888}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5062411866112}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4429610391296}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5695213340928}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3796808916480}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5378812603520}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4429610391296}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5378812603520}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3796808916480}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5695213340928}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4429610391296}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5062411866112}, {'type': 'perplexity', 'in_batch_dim': [8, 160], 'batch_size': 8, 'flops': 3164007441664}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4746011128704}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5695213340928}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3796808916480}, {'type': 'perplexity', 'in_batch_dim': [8, 416], 'batch_size': 8, 'flops': 8226419240192}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3796808916480}, {'type': 'perplexity', 'in_batch_dim': [8, 160], 'batch_size': 8, 'flops': 3164007441664}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4746011128704}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3796808916480}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2847606704256}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5378812603520}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4429610391296}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4113209653888}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4429610391296}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6011614078336}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4113209653888}, {'type': 'perplexity', 'in_batch_dim': [8, 320], 'batch_size': 8, 'flops': 6328014815744}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4746011128704}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4746011128704}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3796808916480}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5062411866112}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3480408179072}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3796808916480}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3480408179072}, {'type': 'perplexity', 'in_batch_dim': [8, 352], 'batch_size': 8, 'flops': 6960816290560}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5062411866112}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4429610391296}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6011614078336}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5062411866112}, {'type': 'perplexity', 'in_batch_dim': [8, 576], 'batch_size': 8, 'flops': 11390426614272}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4429610391296}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3796808916480}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6011614078336}, {'type': 'perplexity', 'in_batch_dim': [8, 432], 'batch_size': 8, 'flops': 8542819977600}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5378812603520}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5378812603520}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5062411866112}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4113209653888}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4113209653888}, {'type': 'perplexity', 'in_batch_dim': [8, 368], 'batch_size': 8, 'flops': 7277217027968}, {'type': 'perplexity', 'in_batch_dim': [8, 160], 'batch_size': 8, 'flops': 3164007441664}, {'type': 'perplexity', 'in_batch_dim': [8, 368], 'batch_size': 8, 'flops': 7277217027968}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3796808916480}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4746011128704}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4113209653888}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4429610391296}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5695213340928}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5695213340928}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5062411866112}, {'type': 'perplexity', 'in_batch_dim': [8, 432], 'batch_size': 8, 'flops': 8542819977600}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5062411866112}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4113209653888}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3480408179072}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3480408179072}, {'type': 'perplexity', 'in_batch_dim': [8, 160], 'batch_size': 8, 'flops': 3164007441664}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4113209653888}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3796808916480}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5062411866112}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 368], 'batch_size': 8, 'flops': 7277217027968}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5378812603520}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5378812603520}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5062411866112}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2847606704256}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4429610391296}, {'type': 'perplexity', 'in_batch_dim': [8, 320], 'batch_size': 8, 'flops': 6328014815744}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3796808916480}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4429610391296}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4113209653888}, {'type': 'perplexity', 'in_batch_dim': [8, 384], 'batch_size': 8, 'flops': 7593617765376}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5378812603520}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4746011128704}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3480408179072}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5062411866112}, {'type': 'perplexity', 'in_batch_dim': [8, 320], 'batch_size': 8, 'flops': 6328014815744}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5378812603520}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4113209653888}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4113209653888}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5695213340928}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3480408179072}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6011614078336}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4746011128704}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5378812603520}, {'type': 'perplexity', 'in_batch_dim': [8, 320], 'batch_size': 8, 'flops': 6328014815744}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4429610391296}, {'type': 'perplexity', 'in_batch_dim': [8, 320], 'batch_size': 8, 'flops': 6328014815744}, {'type': 'perplexity', 'in_batch_dim': [8, 416], 'batch_size': 8, 'flops': 8226419240192}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5062411866112}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5062411866112}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2847606704256}, {'type': 'perplexity', 'in_batch_dim': [8, 336], 'batch_size': 8, 'flops': 6644415553152}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6011614078336}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5062411866112}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4113209653888}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5695213340928}, {'type': 'perplexity', 'in_batch_dim': [8, 416], 'batch_size': 8, 'flops': 8226419240192}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3796808916480}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5695213340928}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4746011128704}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4429610391296}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4113209653888}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3480408179072}, {'type': 'perplexity', 'in_batch_dim': [8, 320], 'batch_size': 8, 'flops': 6328014815744}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3480408179072}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5378812603520}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6011614078336}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4429610391296}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5695213340928}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5695213340928}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3480408179072}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4429610391296}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5062411866112}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3480408179072}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4113209653888}, {'type': 'perplexity', 'in_batch_dim': [8, 352], 'batch_size': 8, 'flops': 6960816290560}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4113209653888}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2847606704256}, {'type': 'perplexity', 'in_batch_dim': [8, 160], 'batch_size': 8, 'flops': 3164007441664}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3796808916480}, {'type': 'perplexity', 'in_batch_dim': [8, 496], 'batch_size': 8, 'flops': 9808422927232}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3480408179072}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4113209653888}, {'type': 'perplexity', 'in_batch_dim': [8, 544], 'batch_size': 8, 'flops': 10757625139456}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6011614078336}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5378812603520}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5695213340928}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4746011128704}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5695213340928}, {'type': 'perplexity', 'in_batch_dim': [8, 384], 'batch_size': 8, 'flops': 7593617765376}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3480408179072}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5378812603520}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3480408179072}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5378812603520}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5062411866112}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3796808916480}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5062411866112}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5695213340928}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5378812603520}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4429610391296}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5062411866112}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3480408179072}, {'type': 'perplexity', 'in_batch_dim': [8, 656], 'batch_size': 8, 'flops': 12972430301312}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5062411866112}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3796808916480}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3480408179072}, {'type': 'perplexity', 'in_batch_dim': [8, 160], 'batch_size': 8, 'flops': 3164007441664}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5695213340928}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6011614078336}, {'type': 'perplexity', 'in_batch_dim': [8, 320], 'batch_size': 8, 'flops': 6328014815744}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5062411866112}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5062411866112}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5695213340928}, {'type': 'perplexity', 'in_batch_dim': [8, 320], 'batch_size': 8, 'flops': 6328014815744}, {'type': 'perplexity', 'in_batch_dim': [8, 384], 'batch_size': 8, 'flops': 7593617765376}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5062411866112}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4746011128704}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4113209653888}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4746011128704}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4746011128704}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3480408179072}, {'type': 'perplexity', 'in_batch_dim': [8, 320], 'batch_size': 8, 'flops': 6328014815744}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5695213340928}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5062411866112}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4113209653888}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4746011128704}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2847606704256}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3796808916480}, {'type': 'perplexity', 'in_batch_dim': [8, 336], 'batch_size': 8, 'flops': 6644415553152}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4113209653888}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4746011128704}, {'type': 'perplexity', 'in_batch_dim': [8, 496], 'batch_size': 8, 'flops': 9808422927232}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5378812603520}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5378812603520}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4429610391296}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5062411866112}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6011614078336}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4746011128704}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3796808916480}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4113209653888}, {'type': 'perplexity', 'in_batch_dim': [8, 496], 'batch_size': 8, 'flops': 9808422927232}, {'type': 'perplexity', 'in_batch_dim': [8, 448], 'batch_size': 8, 'flops': 8859220715008}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6011614078336}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3796808916480}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3480408179072}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5062411866112}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4429610391296}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4113209653888}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4746011128704}, {'type': 'perplexity', 'in_batch_dim': [8, 320], 'batch_size': 8, 'flops': 6328014815744}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6011614078336}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3480408179072}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3796808916480}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3796808916480}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4746011128704}, {'type': 'perplexity', 'in_batch_dim': [8, 416], 'batch_size': 8, 'flops': 8226419240192}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5695213340928}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6011614078336}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6011614078336}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4746011128704}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3480408179072}, {'type': 'perplexity', 'in_batch_dim': [8, 160], 'batch_size': 8, 'flops': 3164007441664}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5695213340928}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3480408179072}, {'type': 'perplexity', 'in_batch_dim': [8, 400], 'batch_size': 8, 'flops': 7910018502784}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4429610391296}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5378812603520}, {'type': 'perplexity', 'in_batch_dim': [8, 400], 'batch_size': 8, 'flops': 7910018502784}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5378812603520}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3480408179072}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6011614078336}, {'type': 'perplexity', 'in_batch_dim': [8, 160], 'batch_size': 8, 'flops': 3164007441664}, {'type': 'perplexity', 'in_batch_dim': [8, 352], 'batch_size': 8, 'flops': 6960816290560}, {'type': 'perplexity', 'in_batch_dim': [8, 320], 'batch_size': 8, 'flops': 6328014815744}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5062411866112}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5062411866112}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4113209653888}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4429610391296}, {'type': 'perplexity', 'in_batch_dim': [8, 352], 'batch_size': 8, 'flops': 6960816290560}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4113209653888}, {'type': 'perplexity', 'in_batch_dim': [8, 160], 'batch_size': 8, 'flops': 3164007441664}, {'type': 'perplexity', 'in_batch_dim': [8, 336], 'batch_size': 8, 'flops': 6644415553152}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4429610391296}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3796808916480}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5378812603520}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5378812603520}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4113209653888}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5378812603520}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3796808916480}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5378812603520}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5378812603520}, {'type': 'perplexity', 'in_batch_dim': [8, 160], 'batch_size': 8, 'flops': 3164007441664}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3480408179072}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6011614078336}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5695213340928}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5378812603520}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5378812603520}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4429610391296}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4746011128704}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6011614078336}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5062411866112}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6011614078336}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5378812603520}, {'type': 'perplexity', 'in_batch_dim': [8, 160], 'batch_size': 8, 'flops': 3164007441664}, {'type': 'perplexity', 'in_batch_dim': [8, 160], 'batch_size': 8, 'flops': 3164007441664}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5062411866112}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5062411866112}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3796808916480}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6011614078336}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4113209653888}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4746011128704}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5378812603520}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3480408179072}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3796808916480}, {'type': 'perplexity', 'in_batch_dim': [8, 320], 'batch_size': 8, 'flops': 6328014815744}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5062411866112}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4746011128704}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5062411866112}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4746011128704}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6011614078336}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3796808916480}, {'type': 'perplexity', 'in_batch_dim': [8, 160], 'batch_size': 8, 'flops': 3164007441664}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2847606704256}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4429610391296}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4746011128704}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5062411866112}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5378812603520}, {'type': 'perplexity', 'in_batch_dim': [8, 464], 'batch_size': 8, 'flops': 9175621452416}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4113209653888}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3480408179072}, {'type': 'perplexity', 'in_batch_dim': [8, 384], 'batch_size': 8, 'flops': 7593617765376}, {'type': 'perplexity', 'in_batch_dim': [8, 448], 'batch_size': 8, 'flops': 8859220715008}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5378812603520}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4113209653888}, {'type': 'perplexity', 'in_batch_dim': [8, 320], 'batch_size': 8, 'flops': 6328014815744}, {'type': 'perplexity', 'in_batch_dim': [8, 560], 'batch_size': 8, 'flops': 11074025876864}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5378812603520}, {'type': 'perplexity', 'in_batch_dim': [8, 160], 'batch_size': 8, 'flops': 3164007441664}, {'type': 'perplexity', 'in_batch_dim': [8, 320], 'batch_size': 8, 'flops': 6328014815744}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5062411866112}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6011614078336}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4746011128704}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6011614078336}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3796808916480}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4429610391296}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4113209653888}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4746011128704}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3480408179072}, {'type': 'perplexity', 'in_batch_dim': [8, 384], 'batch_size': 8, 'flops': 7593617765376}, {'type': 'perplexity', 'in_batch_dim': [8, 576], 'batch_size': 8, 'flops': 11390426614272}, {'type': 'perplexity', 'in_batch_dim': [8, 160], 'batch_size': 8, 'flops': 3164007441664}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4746011128704}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4746011128704}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4429610391296}, {'type': 'perplexity', 'in_batch_dim': [8, 336], 'batch_size': 8, 'flops': 6644415553152}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4746011128704}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3796808916480}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3796808916480}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5062411866112}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5378812603520}, {'type': 'perplexity', 'in_batch_dim': [8, 368], 'batch_size': 8, 'flops': 7277217027968}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3480408179072}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4113209653888}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4746011128704}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3480408179072}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4746011128704}, {'type': 'perplexity', 'in_batch_dim': [8, 1168], 'batch_size': 8, 'flops': 23097253898368}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5062411866112}, {'type': 'perplexity', 'in_batch_dim': [8, 160], 'batch_size': 8, 'flops': 3164007441664}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4746011128704}, {'type': 'perplexity', 'in_batch_dim': [8, 640], 'batch_size': 8, 'flops': 12656029563904}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5378812603520}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4429610391296}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4429610391296}, {'type': 'perplexity', 'in_batch_dim': [8, 384], 'batch_size': 8, 'flops': 7593617765376}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3480408179072}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4113209653888}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3796808916480}, {'type': 'perplexity', 'in_batch_dim': [8, 336], 'batch_size': 8, 'flops': 6644415553152}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3480408179072}, {'type': 'perplexity', 'in_batch_dim': [6, 208], 'batch_size': 8, 'flops': 4113209653888}], 'timestamp': '2025-09-10 02:16:38.184699', 'step': 294, 'epoch': 1} {'type': 'pplx', 'content': 15332585.816633547, 'timestamp': '2025-09-10 02:16:38.187323', 'step': 294, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 352], 'flops': 10441544708224}, 'timestamp': '2025-09-10 02:16:38.219791', 'step': 294, 'epoch': 1} {'type': 'loss', 'content': 0.023444533348083496, 'timestamp': '2025-09-10 02:16:38.233473', 'step': 295, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 384], 'flops': 11390752160256}, 'timestamp': '2025-09-10 02:16:38.268307', 'step': 295, 'epoch': 1} {'type': 'loss', 'content': 0.008510954678058624, 'timestamp': '2025-09-10 02:16:38.303183', 'step': 296, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 6644714900096}, 'timestamp': '2025-09-10 02:16:38.335146', 'step': 296, 'epoch': 1} {'type': 'loss', 'content': 0.015145028941333294, 'timestamp': '2025-09-10 02:16:38.339032', 'step': 297, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 176], 'flops': 5220903722048}, 'timestamp': '2025-09-10 02:16:38.370675', 'step': 297, 'epoch': 1} {'type': 'loss', 'content': 0.0069735231809318066, 'timestamp': '2025-09-10 02:16:38.374348', 'step': 298, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 400], 'flops': 11865355886272}, 'timestamp': '2025-09-10 02:16:38.412173', 'step': 298, 'epoch': 1} {'type': 'loss', 'content': 0.013920980505645275, 'timestamp': '2025-09-10 02:16:38.427806', 'step': 299, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 4746299996032}, 'timestamp': '2025-09-10 02:16:38.460108', 'step': 299, 'epoch': 1} {'type': 'loss', 'content': 0.04089200869202614, 'timestamp': '2025-09-10 02:16:38.485588', 'step': 300, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 5695507448064}, 'timestamp': '2025-09-10 02:16:38.520153', 'step': 300, 'epoch': 1} {'type': 'loss', 'content': 0.013281070627272129, 'timestamp': '2025-09-10 02:16:38.523981', 'step': 301, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 8068526078144}, 'timestamp': '2025-09-10 02:16:38.558005', 'step': 301, 'epoch': 1} {'type': 'loss', 'content': 0.03492172807455063, 'timestamp': '2025-09-10 02:16:38.567580', 'step': 302, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 8068526078144}, 'timestamp': '2025-09-10 02:16:38.600149', 'step': 302, 'epoch': 1} {'type': 'loss', 'content': 0.03769215941429138, 'timestamp': '2025-09-10 02:16:38.609626', 'step': 303, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 7119318626112}, 'timestamp': '2025-09-10 02:16:38.641634', 'step': 303, 'epoch': 1} {'type': 'loss', 'content': 0.010984980501234531, 'timestamp': '2025-09-10 02:16:38.669717', 'step': 304, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 8068526078144}, 'timestamp': '2025-09-10 02:16:38.699933', 'step': 304, 'epoch': 1} {'type': 'loss', 'content': 0.016611166298389435, 'timestamp': '2025-09-10 02:16:38.707452', 'step': 305, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 8543129804160}, 'timestamp': '2025-09-10 02:16:38.739406', 'step': 305, 'epoch': 1} {'type': 'loss', 'content': 0.013240032829344273, 'timestamp': '2025-09-10 02:16:38.750082', 'step': 306, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 320], 'flops': 9492337256192}, 'timestamp': '2025-09-10 02:16:38.780171', 'step': 306, 'epoch': 1} {'type': 'loss', 'content': 0.01312293391674757, 'timestamp': '2025-09-10 02:16:38.792673', 'step': 307, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 7593922352128}, 'timestamp': '2025-09-10 02:16:38.823157', 'step': 307, 'epoch': 1} {'type': 'loss', 'content': 0.01570984721183777, 'timestamp': '2025-09-10 02:16:38.851813', 'step': 308, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 6644714900096}, 'timestamp': '2025-09-10 02:16:38.882826', 'step': 308, 'epoch': 1} {'type': 'loss', 'content': 0.011351371183991432, 'timestamp': '2025-09-10 02:16:38.887319', 'step': 309, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 176], 'flops': 5220903722048}, 'timestamp': '2025-09-10 02:16:38.916806', 'step': 309, 'epoch': 1} {'type': 'loss', 'content': 0.006130642257630825, 'timestamp': '2025-09-10 02:16:38.920935', 'step': 310, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 6170111174080}, 'timestamp': '2025-09-10 02:16:38.952461', 'step': 310, 'epoch': 1} {'type': 'loss', 'content': 0.02042931318283081, 'timestamp': '2025-09-10 02:16:38.959345', 'step': 311, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 384], 'flops': 11390752160256}, 'timestamp': '2025-09-10 02:16:38.994181', 'step': 311, 'epoch': 1} {'type': 'loss', 'content': 0.008902345784008503, 'timestamp': '2025-09-10 02:16:39.029134', 'step': 312, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 352], 'flops': 10441544708224}, 'timestamp': '2025-09-10 02:16:39.062647', 'step': 312, 'epoch': 1} {'type': 'loss', 'content': 0.02012813650071621, 'timestamp': '2025-09-10 02:16:39.075585', 'step': 313, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 304], 'flops': 9017733530176}, 'timestamp': '2025-09-10 02:16:39.106912', 'step': 313, 'epoch': 1} {'type': 'loss', 'content': 0.022141067311167717, 'timestamp': '2025-09-10 02:16:39.118577', 'step': 314, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 176], 'flops': 5220903722048}, 'timestamp': '2025-09-10 02:16:39.151698', 'step': 314, 'epoch': 1} {'type': 'loss', 'content': 0.03623204678297043, 'timestamp': '2025-09-10 02:16:39.154698', 'step': 315, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 6170111174080}, 'timestamp': '2025-09-10 02:16:39.185998', 'step': 315, 'epoch': 1} {'type': 'loss', 'content': 0.0135353934019804, 'timestamp': '2025-09-10 02:16:39.213392', 'step': 316, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 7119318626112}, 'timestamp': '2025-09-10 02:16:39.243549', 'step': 316, 'epoch': 1} {'type': 'loss', 'content': 0.028507256880402565, 'timestamp': '2025-09-10 02:16:39.248758', 'step': 317, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 8543129804160}, 'timestamp': '2025-09-10 02:16:39.279221', 'step': 317, 'epoch': 1} {'type': 'loss', 'content': 0.029799891635775566, 'timestamp': '2025-09-10 02:16:39.290132', 'step': 318, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 6170111174080}, 'timestamp': '2025-09-10 02:16:39.319964', 'step': 318, 'epoch': 1} {'type': 'loss', 'content': 0.02186533249914646, 'timestamp': '2025-09-10 02:16:39.326712', 'step': 319, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 8068526078144}, 'timestamp': '2025-09-10 02:16:39.356706', 'step': 319, 'epoch': 1} {'type': 'loss', 'content': 0.014612867496907711, 'timestamp': '2025-09-10 02:16:39.387845', 'step': 320, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 400], 'flops': 11865355886272}, 'timestamp': '2025-09-10 02:16:39.425403', 'step': 320, 'epoch': 1} {'type': 'loss', 'content': 0.012654599733650684, 'timestamp': '2025-09-10 02:16:39.440561', 'step': 321, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 7593922352128}, 'timestamp': '2025-09-10 02:16:39.474992', 'step': 321, 'epoch': 1} {'type': 'loss', 'content': 0.021614069119095802, 'timestamp': '2025-09-10 02:16:39.482465', 'step': 322, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 320], 'flops': 9492337256192}, 'timestamp': '2025-09-10 02:16:39.513753', 'step': 322, 'epoch': 1} {'type': 'loss', 'content': 0.012967349961400032, 'timestamp': '2025-09-10 02:16:39.526078', 'step': 323, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 7119318626112}, 'timestamp': '2025-09-10 02:16:39.560156', 'step': 323, 'epoch': 1} {'type': 'loss', 'content': 0.024872979149222374, 'timestamp': '2025-09-10 02:16:39.588086', 'step': 324, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 7593922352128}, 'timestamp': '2025-09-10 02:16:39.619750', 'step': 324, 'epoch': 1} {'type': 'loss', 'content': 0.026342902332544327, 'timestamp': '2025-09-10 02:16:39.624580', 'step': 325, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 5695507448064}, 'timestamp': '2025-09-10 02:16:39.654950', 'step': 325, 'epoch': 1} {'type': 'loss', 'content': 0.006380206905305386, 'timestamp': '2025-09-10 02:16:39.659454', 'step': 326, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 5695507448064}, 'timestamp': '2025-09-10 02:16:39.689823', 'step': 326, 'epoch': 1} {'type': 'loss', 'content': 0.014897564426064491, 'timestamp': '2025-09-10 02:16:39.694202', 'step': 327, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 8068526078144}, 'timestamp': '2025-09-10 02:16:39.724802', 'step': 327, 'epoch': 1} {'type': 'loss', 'content': 0.02040562406182289, 'timestamp': '2025-09-10 02:16:39.755826', 'step': 328, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 5695507448064}, 'timestamp': '2025-09-10 02:16:39.787423', 'step': 328, 'epoch': 1} {'type': 'loss', 'content': 0.015893712639808655, 'timestamp': '2025-09-10 02:16:39.789419', 'step': 329, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 6170111174080}, 'timestamp': '2025-09-10 02:16:39.819626', 'step': 329, 'epoch': 1} {'type': 'loss', 'content': 0.03249195218086243, 'timestamp': '2025-09-10 02:16:39.826538', 'step': 330, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 6644714900096}, 'timestamp': '2025-09-10 02:16:39.859763', 'step': 330, 'epoch': 1} {'type': 'loss', 'content': 0.025817295536398888, 'timestamp': '2025-09-10 02:16:39.866827', 'step': 331, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 8068526078144}, 'timestamp': '2025-09-10 02:16:39.897181', 'step': 331, 'epoch': 1} {'type': 'loss', 'content': 0.030507784336805344, 'timestamp': '2025-09-10 02:16:39.928222', 'step': 332, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 7593922352128}, 'timestamp': '2025-09-10 02:16:39.958589', 'step': 332, 'epoch': 1} {'type': 'loss', 'content': 0.04506632685661316, 'timestamp': '2025-09-10 02:16:39.964162', 'step': 333, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 6170111174080}, 'timestamp': '2025-09-10 02:16:39.994622', 'step': 333, 'epoch': 1} {'type': 'loss', 'content': 0.009867721237242222, 'timestamp': '2025-09-10 02:16:40.001420', 'step': 334, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 6644714900096}, 'timestamp': '2025-09-10 02:16:40.031806', 'step': 334, 'epoch': 1} {'type': 'loss', 'content': 0.03285963833332062, 'timestamp': '2025-09-10 02:16:40.038877', 'step': 335, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 6170111174080}, 'timestamp': '2025-09-10 02:16:40.068180', 'step': 335, 'epoch': 1} {'type': 'loss', 'content': 0.004922616295516491, 'timestamp': '2025-09-10 02:16:40.096072', 'step': 336, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 5695507448064}, 'timestamp': '2025-09-10 02:16:40.125555', 'step': 336, 'epoch': 1} {'type': 'loss', 'content': 0.011179156601428986, 'timestamp': '2025-09-10 02:16:40.127649', 'step': 337, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 176], 'flops': 5220903722048}, 'timestamp': '2025-09-10 02:16:40.157666', 'step': 337, 'epoch': 1} {'type': 'loss', 'content': 0.011313307099044323, 'timestamp': '2025-09-10 02:16:40.161788', 'step': 338, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 5695507448064}, 'timestamp': '2025-09-10 02:16:40.191735', 'step': 338, 'epoch': 1} {'type': 'loss', 'content': 0.008836266584694386, 'timestamp': '2025-09-10 02:16:40.196410', 'step': 339, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 176], 'flops': 5220903722048}, 'timestamp': '2025-09-10 02:16:40.226239', 'step': 339, 'epoch': 1} {'type': 'loss', 'content': 0.021027730777859688, 'timestamp': '2025-09-10 02:16:40.251163', 'step': 340, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 8068526078144}, 'timestamp': '2025-09-10 02:16:40.282459', 'step': 340, 'epoch': 1} {'type': 'loss', 'content': 0.020776817575097084, 'timestamp': '2025-09-10 02:16:40.289283', 'step': 341, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 4746299996032}, 'timestamp': '2025-09-10 02:16:40.322361', 'step': 341, 'epoch': 1} {'type': 'loss', 'content': 0.013467268086969852, 'timestamp': '2025-09-10 02:16:40.325962', 'step': 342, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 6644714900096}, 'timestamp': '2025-09-10 02:16:40.358346', 'step': 342, 'epoch': 1} {'type': 'loss', 'content': 0.0017702631885185838, 'timestamp': '2025-09-10 02:16:40.365035', 'step': 343, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 7119318626112}, 'timestamp': '2025-09-10 02:16:40.395803', 'step': 343, 'epoch': 1} {'type': 'loss', 'content': 0.01885797269642353, 'timestamp': '2025-09-10 02:16:40.424139', 'step': 344, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 304], 'flops': 9017733530176}, 'timestamp': '2025-09-10 02:16:40.455425', 'step': 344, 'epoch': 1} {'type': 'loss', 'content': 0.011524977162480354, 'timestamp': '2025-09-10 02:16:40.465019', 'step': 345, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 176], 'flops': 5220903722048}, 'timestamp': '2025-09-10 02:16:40.496082', 'step': 345, 'epoch': 1} {'type': 'loss', 'content': 0.020894749090075493, 'timestamp': '2025-09-10 02:16:40.500138', 'step': 346, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 8543129804160}, 'timestamp': '2025-09-10 02:16:40.533156', 'step': 346, 'epoch': 1} {'type': 'loss', 'content': 0.022029070183634758, 'timestamp': '2025-09-10 02:16:40.543987', 'step': 347, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 7119318626112}, 'timestamp': '2025-09-10 02:16:40.574403', 'step': 347, 'epoch': 1} {'type': 'loss', 'content': 0.018113840371370316, 'timestamp': '2025-09-10 02:16:40.602678', 'step': 348, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 6644714900096}, 'timestamp': '2025-09-10 02:16:40.632904', 'step': 348, 'epoch': 1} {'type': 'loss', 'content': 0.030301451683044434, 'timestamp': '2025-09-10 02:16:40.637626', 'step': 349, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 6644714900096}, 'timestamp': '2025-09-10 02:16:40.667865', 'step': 349, 'epoch': 1} {'type': 'loss', 'content': 0.008944302797317505, 'timestamp': '2025-09-10 02:16:40.674887', 'step': 350, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 8543129804160}, 'timestamp': '2025-09-10 02:16:40.706075', 'step': 350, 'epoch': 1} {'type': 'loss', 'content': 0.015170658007264137, 'timestamp': '2025-09-10 02:16:40.716939', 'step': 351, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 6170111174080}, 'timestamp': '2025-09-10 02:16:40.747818', 'step': 351, 'epoch': 1} {'type': 'loss', 'content': 0.004507328849285841, 'timestamp': '2025-09-10 02:16:40.775419', 'step': 352, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 6644714900096}, 'timestamp': '2025-09-10 02:16:40.805563', 'step': 352, 'epoch': 1} {'type': 'loss', 'content': 0.03259176388382912, 'timestamp': '2025-09-10 02:16:40.810294', 'step': 353, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 7593922352128}, 'timestamp': '2025-09-10 02:16:40.840532', 'step': 353, 'epoch': 1} {'type': 'loss', 'content': 0.019336406141519547, 'timestamp': '2025-09-10 02:16:40.848235', 'step': 354, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 304], 'flops': 9017733530176}, 'timestamp': '2025-09-10 02:16:40.879018', 'step': 354, 'epoch': 1} {'type': 'loss', 'content': 0.009617815725505352, 'timestamp': '2025-09-10 02:16:40.891221', 'step': 355, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 336], 'flops': 9966940982208}, 'timestamp': '2025-09-10 02:16:40.924595', 'step': 355, 'epoch': 1} {'type': 'loss', 'content': 0.01103215478360653, 'timestamp': '2025-09-10 02:16:40.958887', 'step': 356, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 8543129804160}, 'timestamp': '2025-09-10 02:16:40.992068', 'step': 356, 'epoch': 1} {'type': 'loss', 'content': 0.004833351355046034, 'timestamp': '2025-09-10 02:16:41.000528', 'step': 357, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 656], 'flops': 19459015502528}, 'timestamp': '2025-09-10 02:16:41.061147', 'step': 357, 'epoch': 1} {'type': 'loss', 'content': 0.029006347060203552, 'timestamp': '2025-09-10 02:16:41.084566', 'step': 358, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 7119318626112}, 'timestamp': '2025-09-10 02:16:41.115146', 'step': 358, 'epoch': 1} {'type': 'loss', 'content': 0.0070088389329612255, 'timestamp': '2025-09-10 02:16:41.122685', 'step': 359, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 6170111174080}, 'timestamp': '2025-09-10 02:16:41.153083', 'step': 359, 'epoch': 1} {'type': 'loss', 'content': 0.005866586230695248, 'timestamp': '2025-09-10 02:16:41.180827', 'step': 360, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 7593922352128}, 'timestamp': '2025-09-10 02:16:41.211081', 'step': 360, 'epoch': 1} {'type': 'loss', 'content': 0.005960374139249325, 'timestamp': '2025-09-10 02:16:41.216464', 'step': 361, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 6644714900096}, 'timestamp': '2025-09-10 02:16:41.247340', 'step': 361, 'epoch': 1} {'type': 'loss', 'content': 0.020136630162596703, 'timestamp': '2025-09-10 02:16:41.253990', 'step': 362, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 6170111174080}, 'timestamp': '2025-09-10 02:16:41.285009', 'step': 362, 'epoch': 1} {'type': 'loss', 'content': 0.019610974937677383, 'timestamp': '2025-09-10 02:16:41.291838', 'step': 363, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 320], 'flops': 9492337256192}, 'timestamp': '2025-09-10 02:16:41.322911', 'step': 363, 'epoch': 1} {'type': 'loss', 'content': 0.008913476951420307, 'timestamp': '2025-09-10 02:16:41.356086', 'step': 364, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 7119318626112}, 'timestamp': '2025-09-10 02:16:41.387239', 'step': 364, 'epoch': 1} {'type': 'loss', 'content': 0.011225526221096516, 'timestamp': '2025-09-10 02:16:41.392356', 'step': 365, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 7593922352128}, 'timestamp': '2025-09-10 02:16:41.423802', 'step': 365, 'epoch': 1} {'type': 'loss', 'content': 0.006913323421031237, 'timestamp': '2025-09-10 02:16:41.431320', 'step': 366, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 7119318626112}, 'timestamp': '2025-09-10 02:16:41.461790', 'step': 366, 'epoch': 1} {'type': 'loss', 'content': 0.009624729864299297, 'timestamp': '2025-09-10 02:16:41.469080', 'step': 367, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 6644714900096}, 'timestamp': '2025-09-10 02:16:41.499416', 'step': 367, 'epoch': 1} {'type': 'loss', 'content': 0.025887373834848404, 'timestamp': '2025-09-10 02:16:41.527240', 'step': 368, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 6170111174080}, 'timestamp': '2025-09-10 02:16:41.558343', 'step': 368, 'epoch': 1} {'type': 'loss', 'content': 0.008098089136183262, 'timestamp': '2025-09-10 02:16:41.562878', 'step': 369, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 6644714900096}, 'timestamp': '2025-09-10 02:16:41.593446', 'step': 369, 'epoch': 1} {'type': 'loss', 'content': 0.006331682205200195, 'timestamp': '2025-09-10 02:16:41.600508', 'step': 370, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 176], 'flops': 5220903722048}, 'timestamp': '2025-09-10 02:16:41.630146', 'step': 370, 'epoch': 1} {'type': 'loss', 'content': 0.006118200719356537, 'timestamp': '2025-09-10 02:16:41.634316', 'step': 371, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 8543129804160}, 'timestamp': '2025-09-10 02:16:41.665242', 'step': 371, 'epoch': 1} {'type': 'loss', 'content': 0.008842705748975277, 'timestamp': '2025-09-10 02:16:41.696961', 'step': 372, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 6644714900096}, 'timestamp': '2025-09-10 02:16:41.727842', 'step': 372, 'epoch': 1} {'type': 'loss', 'content': 0.0035271942615509033, 'timestamp': '2025-09-10 02:16:41.732390', 'step': 373, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 7119318626112}, 'timestamp': '2025-09-10 02:16:41.762973', 'step': 373, 'epoch': 1} {'type': 'loss', 'content': 0.03405757620930672, 'timestamp': '2025-09-10 02:16:41.770190', 'step': 374, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 6170111174080}, 'timestamp': '2025-09-10 02:16:41.800748', 'step': 374, 'epoch': 1} {'type': 'loss', 'content': 0.011618994176387787, 'timestamp': '2025-09-10 02:16:41.807526', 'step': 375, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 320], 'flops': 9492337256192}, 'timestamp': '2025-09-10 02:16:41.838416', 'step': 375, 'epoch': 1} {'type': 'loss', 'content': 0.02129237912595272, 'timestamp': '2025-09-10 02:16:41.871884', 'step': 376, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 512], 'flops': 15187581968384}, 'timestamp': '2025-09-10 02:16:41.911666', 'step': 376, 'epoch': 1} {'type': 'loss', 'content': 0.010881869122385979, 'timestamp': '2025-09-10 02:16:41.929030', 'step': 377, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 7119318626112}, 'timestamp': '2025-09-10 02:16:41.962237', 'step': 377, 'epoch': 1} {'type': 'loss', 'content': 0.043674368411302567, 'timestamp': '2025-09-10 02:16:41.969644', 'step': 378, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 6170111174080}, 'timestamp': '2025-09-10 02:16:42.001605', 'step': 378, 'epoch': 1} {'type': 'loss', 'content': 0.026635179296135902, 'timestamp': '2025-09-10 02:16:42.007745', 'step': 379, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 6644714900096}, 'timestamp': '2025-09-10 02:16:42.038844', 'step': 379, 'epoch': 1} {'type': 'loss', 'content': 0.020435309037566185, 'timestamp': '2025-09-10 02:16:42.066678', 'step': 380, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 6170111174080}, 'timestamp': '2025-09-10 02:16:42.096798', 'step': 380, 'epoch': 1} {'type': 'loss', 'content': 0.00813285168260336, 'timestamp': '2025-09-10 02:16:42.101285', 'step': 381, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 6170111174080}, 'timestamp': '2025-09-10 02:16:42.131500', 'step': 381, 'epoch': 1} {'type': 'loss', 'content': 0.002828313270583749, 'timestamp': '2025-09-10 02:16:42.138351', 'step': 382, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 6170111174080}, 'timestamp': '2025-09-10 02:16:42.168544', 'step': 382, 'epoch': 1} {'type': 'loss', 'content': 0.009776918217539787, 'timestamp': '2025-09-10 02:16:42.175390', 'step': 383, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 8068526078144}, 'timestamp': '2025-09-10 02:16:42.205969', 'step': 383, 'epoch': 1} {'type': 'loss', 'content': 0.005225719418376684, 'timestamp': '2025-09-10 02:16:42.237096', 'step': 384, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 336], 'flops': 9966940982208}, 'timestamp': '2025-09-10 02:16:42.268769', 'step': 384, 'epoch': 1} {'type': 'loss', 'content': 0.01830691285431385, 'timestamp': '2025-09-10 02:16:42.281411', 'step': 385, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 6170111174080}, 'timestamp': '2025-09-10 02:16:42.311923', 'step': 385, 'epoch': 1} {'type': 'loss', 'content': 0.03338460996747017, 'timestamp': '2025-09-10 02:16:42.318762', 'step': 386, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 6644714900096}, 'timestamp': '2025-09-10 02:16:42.352378', 'step': 386, 'epoch': 1} {'type': 'loss', 'content': 0.009636408649384975, 'timestamp': '2025-09-10 02:16:42.356616', 'step': 387, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 352], 'flops': 10441544708224}, 'timestamp': '2025-09-10 02:16:42.392495', 'step': 387, 'epoch': 1} {'type': 'loss', 'content': 0.011447208002209663, 'timestamp': '2025-09-10 02:16:42.427109', 'step': 388, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 6644714900096}, 'timestamp': '2025-09-10 02:16:42.458611', 'step': 388, 'epoch': 1} {'type': 'loss', 'content': 0.008088217116892338, 'timestamp': '2025-09-10 02:16:42.463326', 'step': 389, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 176], 'flops': 5220903722048}, 'timestamp': '2025-09-10 02:16:42.496371', 'step': 389, 'epoch': 1} {'type': 'loss', 'content': 0.015474558807909489, 'timestamp': '2025-09-10 02:16:42.500585', 'step': 390, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 8543129804160}, 'timestamp': '2025-09-10 02:16:42.531277', 'step': 390, 'epoch': 1} {'type': 'loss', 'content': 0.02543746307492256, 'timestamp': '2025-09-10 02:16:42.542317', 'step': 391, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 7593922352128}, 'timestamp': '2025-09-10 02:16:42.573162', 'step': 391, 'epoch': 1} {'type': 'loss', 'content': 0.025561505928635597, 'timestamp': '2025-09-10 02:16:42.601815', 'step': 392, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 8068526078144}, 'timestamp': '2025-09-10 02:16:42.632716', 'step': 392, 'epoch': 1} {'type': 'loss', 'content': 0.03465661779046059, 'timestamp': '2025-09-10 02:16:42.640158', 'step': 393, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 8068526078144}, 'timestamp': '2025-09-10 02:16:42.671412', 'step': 393, 'epoch': 1} {'type': 'loss', 'content': 0.01359565556049347, 'timestamp': '2025-09-10 02:16:42.680964', 'step': 394, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 7119318626112}, 'timestamp': '2025-09-10 02:16:42.711581', 'step': 394, 'epoch': 1} {'type': 'loss', 'content': 0.007907412014901638, 'timestamp': '2025-09-10 02:16:42.718717', 'step': 395, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 320], 'flops': 9492337256192}, 'timestamp': '2025-09-10 02:16:42.750176', 'step': 395, 'epoch': 1} {'type': 'loss', 'content': 0.0071455794386565685, 'timestamp': '2025-09-10 02:16:42.783294', 'step': 396, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 6644714900096}, 'timestamp': '2025-09-10 02:16:42.816573', 'step': 396, 'epoch': 1} {'type': 'loss', 'content': 0.0031619654037058353, 'timestamp': '2025-09-10 02:16:42.820323', 'step': 397, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 7119318626112}, 'timestamp': '2025-09-10 02:16:42.851829', 'step': 397, 'epoch': 1} {'type': 'loss', 'content': 0.004171676468104124, 'timestamp': '2025-09-10 02:16:42.858972', 'step': 398, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 7119318626112}, 'timestamp': '2025-09-10 02:16:42.890758', 'step': 398, 'epoch': 1} {'type': 'loss', 'content': 0.029826102778315544, 'timestamp': '2025-09-10 02:16:42.897555', 'step': 399, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 5695507448064}, 'timestamp': '2025-09-10 02:16:42.928308', 'step': 399, 'epoch': 1} {'type': 'loss', 'content': 0.01927870139479637, 'timestamp': '2025-09-10 02:16:42.953538', 'step': 400, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 6170111174080}, 'timestamp': '2025-09-10 02:16:42.985120', 'step': 400, 'epoch': 1} {'type': 'loss', 'content': 0.021446945145726204, 'timestamp': '2025-09-10 02:16:42.989056', 'step': 401, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 8068526078144}, 'timestamp': '2025-09-10 02:16:43.022395', 'step': 401, 'epoch': 1} {'type': 'loss', 'content': 0.007334953639656305, 'timestamp': '2025-09-10 02:16:43.031856', 'step': 402, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 320], 'flops': 9492337256192}, 'timestamp': '2025-09-10 02:16:43.064842', 'step': 402, 'epoch': 1} {'type': 'loss', 'content': 0.01580173708498478, 'timestamp': '2025-09-10 02:16:43.076947', 'step': 403, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 5695507448064}, 'timestamp': '2025-09-10 02:16:43.108486', 'step': 403, 'epoch': 1} {'type': 'loss', 'content': 0.004910886753350496, 'timestamp': '2025-09-10 02:16:43.133400', 'step': 404, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 6644714900096}, 'timestamp': '2025-09-10 02:16:43.165557', 'step': 404, 'epoch': 1} {'type': 'loss', 'content': 0.016585027799010277, 'timestamp': '2025-09-10 02:16:43.169480', 'step': 405, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 6170111174080}, 'timestamp': '2025-09-10 02:16:43.201730', 'step': 405, 'epoch': 1} {'type': 'loss', 'content': 0.025732260197401047, 'timestamp': '2025-09-10 02:16:43.208429', 'step': 406, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 7119318626112}, 'timestamp': '2025-09-10 02:16:43.239305', 'step': 406, 'epoch': 1} {'type': 'loss', 'content': 0.017954887822270393, 'timestamp': '2025-09-10 02:16:43.246796', 'step': 407, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 304], 'flops': 9017733530176}, 'timestamp': '2025-09-10 02:16:43.277735', 'step': 407, 'epoch': 1} {'type': 'loss', 'content': 0.004366433713585138, 'timestamp': '2025-09-10 02:16:43.310485', 'step': 408, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 320], 'flops': 9492337256192}, 'timestamp': '2025-09-10 02:16:43.342601', 'step': 408, 'epoch': 1} {'type': 'loss', 'content': 0.02379104681313038, 'timestamp': '2025-09-10 02:16:43.352185', 'step': 409, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 7119318626112}, 'timestamp': '2025-09-10 02:16:43.382988', 'step': 409, 'epoch': 1} {'type': 'loss', 'content': 0.02213932015001774, 'timestamp': '2025-09-10 02:16:43.390277', 'step': 410, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 6644714900096}, 'timestamp': '2025-09-10 02:16:43.421169', 'step': 410, 'epoch': 1} {'type': 'loss', 'content': 0.03638289123773575, 'timestamp': '2025-09-10 02:16:43.427925', 'step': 411, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 6170111174080}, 'timestamp': '2025-09-10 02:16:43.459158', 'step': 411, 'epoch': 1} {'type': 'loss', 'content': 0.004115985240787268, 'timestamp': '2025-09-10 02:16:43.486953', 'step': 412, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 5695507448064}, 'timestamp': '2025-09-10 02:16:43.521004', 'step': 412, 'epoch': 1} {'type': 'loss', 'content': 0.028757499530911446, 'timestamp': '2025-09-10 02:16:43.523691', 'step': 413, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 6170111174080}, 'timestamp': '2025-09-10 02:16:43.556976', 'step': 413, 'epoch': 1} {'type': 'loss', 'content': 0.02367253229022026, 'timestamp': '2025-09-10 02:16:43.562502', 'step': 414, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 6644714900096}, 'timestamp': '2025-09-10 02:16:43.595650', 'step': 414, 'epoch': 1} {'type': 'loss', 'content': 0.015732292085886, 'timestamp': '2025-09-10 02:16:43.601340', 'step': 415, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 8068526078144}, 'timestamp': '2025-09-10 02:16:43.641354', 'step': 415, 'epoch': 1} {'type': 'loss', 'content': 0.007087147329002619, 'timestamp': '2025-09-10 02:16:43.670473', 'step': 416, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 8068526078144}, 'timestamp': '2025-09-10 02:16:43.710572', 'step': 416, 'epoch': 1} {'type': 'loss', 'content': 0.017834067344665527, 'timestamp': '2025-09-10 02:16:43.716218', 'step': 417, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 7119318626112}, 'timestamp': '2025-09-10 02:16:43.755943', 'step': 417, 'epoch': 1} {'type': 'loss', 'content': 0.01305320393294096, 'timestamp': '2025-09-10 02:16:43.762054', 'step': 418, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 8068526078144}, 'timestamp': '2025-09-10 02:16:43.799543', 'step': 418, 'epoch': 1} {'type': 'loss', 'content': 0.0279587022960186, 'timestamp': '2025-09-10 02:16:43.808058', 'step': 419, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 8068526078144}, 'timestamp': '2025-09-10 02:16:43.841203', 'step': 419, 'epoch': 1} {'type': 'loss', 'content': 0.04414095729589462, 'timestamp': '2025-09-10 02:16:43.871907', 'step': 420, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 8543129804160}, 'timestamp': '2025-09-10 02:16:43.902653', 'step': 420, 'epoch': 1} {'type': 'loss', 'content': 0.037644851952791214, 'timestamp': '2025-09-10 02:16:43.910969', 'step': 421, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 8543129804160}, 'timestamp': '2025-09-10 02:16:43.940973', 'step': 421, 'epoch': 1} {'type': 'loss', 'content': 0.019706133753061295, 'timestamp': '2025-09-10 02:16:43.951822', 'step': 422, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 8068526078144}, 'timestamp': '2025-09-10 02:16:43.983401', 'step': 422, 'epoch': 1} {'type': 'loss', 'content': 0.011109764687716961, 'timestamp': '2025-09-10 02:16:43.993594', 'step': 423, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 7593922352128}, 'timestamp': '2025-09-10 02:16:44.024238', 'step': 423, 'epoch': 1} {'type': 'loss', 'content': 0.01477269921451807, 'timestamp': '2025-09-10 02:16:44.052407', 'step': 424, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 6644714900096}, 'timestamp': '2025-09-10 02:16:44.082738', 'step': 424, 'epoch': 1} {'type': 'loss', 'content': 0.005930650979280472, 'timestamp': '2025-09-10 02:16:44.087197', 'step': 425, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 7119318626112}, 'timestamp': '2025-09-10 02:16:44.116986', 'step': 425, 'epoch': 1} {'type': 'loss', 'content': 0.011241083033382893, 'timestamp': '2025-09-10 02:16:44.124430', 'step': 426, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 176], 'flops': 5220903722048}, 'timestamp': '2025-09-10 02:16:44.154303', 'step': 426, 'epoch': 1} {'type': 'loss', 'content': 0.01913139782845974, 'timestamp': '2025-09-10 02:16:44.158484', 'step': 427, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 8068526078144}, 'timestamp': '2025-09-10 02:16:44.189053', 'step': 427, 'epoch': 1} {'type': 'loss', 'content': 0.013194134458899498, 'timestamp': '2025-09-10 02:16:44.220251', 'step': 428, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 8543129804160}, 'timestamp': '2025-09-10 02:16:44.250583', 'step': 428, 'epoch': 1} {'type': 'loss', 'content': 0.004168748389929533, 'timestamp': '2025-09-10 02:16:44.259190', 'step': 429, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 8068526078144}, 'timestamp': '2025-09-10 02:16:44.290390', 'step': 429, 'epoch': 1} {'type': 'loss', 'content': 0.004601773340255022, 'timestamp': '2025-09-10 02:16:44.300694', 'step': 430, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 304], 'flops': 9017733530176}, 'timestamp': '2025-09-10 02:16:44.331481', 'step': 430, 'epoch': 1} {'type': 'loss', 'content': 0.011017811484634876, 'timestamp': '2025-09-10 02:16:44.343647', 'step': 431, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:16:44.385862', 'step': 431, 'epoch': 1} {'type': 'loss', 'content': 0.014921742491424084, 'timestamp': '2025-09-10 02:16:44.409528', 'step': 432, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 6644714900096}, 'timestamp': '2025-09-10 02:16:44.440096', 'step': 432, 'epoch': 1} {'type': 'loss', 'content': 0.03164242208003998, 'timestamp': '2025-09-10 02:16:44.444692', 'step': 433, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 6170111174080}, 'timestamp': '2025-09-10 02:16:44.475580', 'step': 433, 'epoch': 1} {'type': 'loss', 'content': 0.028035728260874748, 'timestamp': '2025-09-10 02:16:44.482514', 'step': 434, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 7119318626112}, 'timestamp': '2025-09-10 02:16:44.513112', 'step': 434, 'epoch': 1} {'type': 'loss', 'content': 0.02136605978012085, 'timestamp': '2025-09-10 02:16:44.520589', 'step': 435, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 6644714900096}, 'timestamp': '2025-09-10 02:16:44.551120', 'step': 435, 'epoch': 1} {'type': 'loss', 'content': 0.007327110972255468, 'timestamp': '2025-09-10 02:16:44.579190', 'step': 436, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 304], 'flops': 9017733530176}, 'timestamp': '2025-09-10 02:16:44.609394', 'step': 436, 'epoch': 1} {'type': 'loss', 'content': 0.010495754890143871, 'timestamp': '2025-09-10 02:16:44.619224', 'step': 437, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 336], 'flops': 9966940982208}, 'timestamp': '2025-09-10 02:16:44.652859', 'step': 437, 'epoch': 1} {'type': 'loss', 'content': 0.048316050320863724, 'timestamp': '2025-09-10 02:16:44.666223', 'step': 438, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 6644714900096}, 'timestamp': '2025-09-10 02:16:44.697552', 'step': 438, 'epoch': 1} {'type': 'loss', 'content': 0.01725853607058525, 'timestamp': '2025-09-10 02:16:44.704625', 'step': 439, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 7593922352128}, 'timestamp': '2025-09-10 02:16:44.735631', 'step': 439, 'epoch': 1} {'type': 'loss', 'content': 0.018758054822683334, 'timestamp': '2025-09-10 02:16:44.764189', 'step': 440, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 7119318626112}, 'timestamp': '2025-09-10 02:16:44.795294', 'step': 440, 'epoch': 1} {'type': 'loss', 'content': 0.007198534905910492, 'timestamp': '2025-09-10 02:16:44.800320', 'step': 441, 'epoch': 1} {'type': 'flops', 'content': [{'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5695213340928}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3796808916480}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4429610391296}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4746011128704}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4746011128704}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6011614078336}, {'type': 'perplexity', 'in_batch_dim': [8, 736], 'batch_size': 8, 'flops': 14554433988352}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3796808916480}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4113209653888}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3796808916480}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4746011128704}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2847606704256}, {'type': 'perplexity', 'in_batch_dim': [8, 320], 'batch_size': 8, 'flops': 6328014815744}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5378812603520}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3796808916480}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4746011128704}, {'type': 'perplexity', 'in_batch_dim': [8, 352], 'batch_size': 8, 'flops': 6960816290560}, {'type': 'perplexity', 'in_batch_dim': [8, 384], 'batch_size': 8, 'flops': 7593617765376}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5378812603520}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4429610391296}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4113209653888}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5062411866112}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5062411866112}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4429610391296}, {'type': 'perplexity', 'in_batch_dim': [8, 480], 'batch_size': 8, 'flops': 9492022189824}, {'type': 'perplexity', 'in_batch_dim': [8, 448], 'batch_size': 8, 'flops': 8859220715008}, {'type': 'perplexity', 'in_batch_dim': [8, 320], 'batch_size': 8, 'flops': 6328014815744}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3796808916480}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6011614078336}, {'type': 'perplexity', 'in_batch_dim': [8, 320], 'batch_size': 8, 'flops': 6328014815744}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5695213340928}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4746011128704}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4429610391296}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5378812603520}, {'type': 'perplexity', 'in_batch_dim': [8, 544], 'batch_size': 8, 'flops': 10757625139456}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4429610391296}, {'type': 'perplexity', 'in_batch_dim': [8, 320], 'batch_size': 8, 'flops': 6328014815744}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4113209653888}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4113209653888}, {'type': 'perplexity', 'in_batch_dim': [8, 320], 'batch_size': 8, 'flops': 6328014815744}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3480408179072}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5062411866112}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6011614078336}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4429610391296}, {'type': 'perplexity', 'in_batch_dim': [8, 384], 'batch_size': 8, 'flops': 7593617765376}, {'type': 'perplexity', 'in_batch_dim': [8, 416], 'batch_size': 8, 'flops': 8226419240192}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4746011128704}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3480408179072}, {'type': 'perplexity', 'in_batch_dim': [8, 384], 'batch_size': 8, 'flops': 7593617765376}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3480408179072}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3480408179072}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5378812603520}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3796808916480}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4113209653888}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5062411866112}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5378812603520}, {'type': 'perplexity', 'in_batch_dim': [8, 320], 'batch_size': 8, 'flops': 6328014815744}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4429610391296}, {'type': 'perplexity', 'in_batch_dim': [8, 160], 'batch_size': 8, 'flops': 3164007441664}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4113209653888}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5695213340928}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3796808916480}, {'type': 'perplexity', 'in_batch_dim': [8, 160], 'batch_size': 8, 'flops': 3164007441664}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6011614078336}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5062411866112}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4429610391296}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3480408179072}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4746011128704}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3480408179072}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4113209653888}, {'type': 'perplexity', 'in_batch_dim': [8, 624], 'batch_size': 8, 'flops': 12339628826496}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4113209653888}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5695213340928}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5062411866112}, {'type': 'perplexity', 'in_batch_dim': [8, 384], 'batch_size': 8, 'flops': 7593617765376}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4429610391296}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4113209653888}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5062411866112}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4429610391296}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5695213340928}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3796808916480}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5378812603520}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4429610391296}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5378812603520}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3796808916480}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5695213340928}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4429610391296}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5062411866112}, {'type': 'perplexity', 'in_batch_dim': [8, 160], 'batch_size': 8, 'flops': 3164007441664}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4746011128704}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5695213340928}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3796808916480}, {'type': 'perplexity', 'in_batch_dim': [8, 416], 'batch_size': 8, 'flops': 8226419240192}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3796808916480}, {'type': 'perplexity', 'in_batch_dim': [8, 160], 'batch_size': 8, 'flops': 3164007441664}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4746011128704}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3796808916480}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2847606704256}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5378812603520}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4429610391296}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4113209653888}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4429610391296}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6011614078336}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4113209653888}, {'type': 'perplexity', 'in_batch_dim': [8, 320], 'batch_size': 8, 'flops': 6328014815744}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4746011128704}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4746011128704}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3796808916480}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5062411866112}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3480408179072}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3796808916480}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3480408179072}, {'type': 'perplexity', 'in_batch_dim': [8, 352], 'batch_size': 8, 'flops': 6960816290560}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5062411866112}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4429610391296}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6011614078336}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5062411866112}, {'type': 'perplexity', 'in_batch_dim': [8, 576], 'batch_size': 8, 'flops': 11390426614272}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4429610391296}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3796808916480}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6011614078336}, {'type': 'perplexity', 'in_batch_dim': [8, 432], 'batch_size': 8, 'flops': 8542819977600}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5378812603520}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5378812603520}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5062411866112}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4113209653888}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4113209653888}, {'type': 'perplexity', 'in_batch_dim': [8, 368], 'batch_size': 8, 'flops': 7277217027968}, {'type': 'perplexity', 'in_batch_dim': [8, 160], 'batch_size': 8, 'flops': 3164007441664}, {'type': 'perplexity', 'in_batch_dim': [8, 368], 'batch_size': 8, 'flops': 7277217027968}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3796808916480}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4746011128704}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4113209653888}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4429610391296}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5695213340928}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5695213340928}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5062411866112}, {'type': 'perplexity', 'in_batch_dim': [8, 432], 'batch_size': 8, 'flops': 8542819977600}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5062411866112}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4113209653888}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3480408179072}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3480408179072}, {'type': 'perplexity', 'in_batch_dim': [8, 160], 'batch_size': 8, 'flops': 3164007441664}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4113209653888}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3796808916480}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5062411866112}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 368], 'batch_size': 8, 'flops': 7277217027968}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5378812603520}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5378812603520}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5062411866112}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2847606704256}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4429610391296}, {'type': 'perplexity', 'in_batch_dim': [8, 320], 'batch_size': 8, 'flops': 6328014815744}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3796808916480}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4429610391296}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4113209653888}, {'type': 'perplexity', 'in_batch_dim': [8, 384], 'batch_size': 8, 'flops': 7593617765376}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5378812603520}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4746011128704}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3480408179072}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5062411866112}, {'type': 'perplexity', 'in_batch_dim': [8, 320], 'batch_size': 8, 'flops': 6328014815744}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5378812603520}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4113209653888}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4113209653888}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5695213340928}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3480408179072}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6011614078336}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4746011128704}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5378812603520}, {'type': 'perplexity', 'in_batch_dim': [8, 320], 'batch_size': 8, 'flops': 6328014815744}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4429610391296}, {'type': 'perplexity', 'in_batch_dim': [8, 320], 'batch_size': 8, 'flops': 6328014815744}, {'type': 'perplexity', 'in_batch_dim': [8, 416], 'batch_size': 8, 'flops': 8226419240192}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5062411866112}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5062411866112}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2847606704256}, {'type': 'perplexity', 'in_batch_dim': [8, 336], 'batch_size': 8, 'flops': 6644415553152}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6011614078336}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5062411866112}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4113209653888}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5695213340928}, {'type': 'perplexity', 'in_batch_dim': [8, 416], 'batch_size': 8, 'flops': 8226419240192}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3796808916480}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5695213340928}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4746011128704}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4429610391296}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4113209653888}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3480408179072}, {'type': 'perplexity', 'in_batch_dim': [8, 320], 'batch_size': 8, 'flops': 6328014815744}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3480408179072}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5378812603520}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6011614078336}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4429610391296}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5695213340928}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5695213340928}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3480408179072}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4429610391296}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5062411866112}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3480408179072}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4113209653888}, {'type': 'perplexity', 'in_batch_dim': [8, 352], 'batch_size': 8, 'flops': 6960816290560}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4113209653888}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2847606704256}, {'type': 'perplexity', 'in_batch_dim': [8, 160], 'batch_size': 8, 'flops': 3164007441664}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3796808916480}, {'type': 'perplexity', 'in_batch_dim': [8, 496], 'batch_size': 8, 'flops': 9808422927232}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3480408179072}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4113209653888}, {'type': 'perplexity', 'in_batch_dim': [8, 544], 'batch_size': 8, 'flops': 10757625139456}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6011614078336}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5378812603520}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5695213340928}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4746011128704}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5695213340928}, {'type': 'perplexity', 'in_batch_dim': [8, 384], 'batch_size': 8, 'flops': 7593617765376}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3480408179072}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5378812603520}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3480408179072}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5378812603520}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5062411866112}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3796808916480}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5062411866112}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5695213340928}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5378812603520}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4429610391296}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5062411866112}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3480408179072}, {'type': 'perplexity', 'in_batch_dim': [8, 656], 'batch_size': 8, 'flops': 12972430301312}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5062411866112}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3796808916480}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3480408179072}, {'type': 'perplexity', 'in_batch_dim': [8, 160], 'batch_size': 8, 'flops': 3164007441664}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5695213340928}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6011614078336}, {'type': 'perplexity', 'in_batch_dim': [8, 320], 'batch_size': 8, 'flops': 6328014815744}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5062411866112}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5062411866112}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5695213340928}, {'type': 'perplexity', 'in_batch_dim': [8, 320], 'batch_size': 8, 'flops': 6328014815744}, {'type': 'perplexity', 'in_batch_dim': [8, 384], 'batch_size': 8, 'flops': 7593617765376}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5062411866112}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4746011128704}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4113209653888}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4746011128704}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4746011128704}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3480408179072}, {'type': 'perplexity', 'in_batch_dim': [8, 320], 'batch_size': 8, 'flops': 6328014815744}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5695213340928}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5062411866112}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4113209653888}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4746011128704}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2847606704256}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3796808916480}, {'type': 'perplexity', 'in_batch_dim': [8, 336], 'batch_size': 8, 'flops': 6644415553152}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4113209653888}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4746011128704}, {'type': 'perplexity', 'in_batch_dim': [8, 496], 'batch_size': 8, 'flops': 9808422927232}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5378812603520}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5378812603520}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4429610391296}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5062411866112}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6011614078336}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4746011128704}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3796808916480}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4113209653888}, {'type': 'perplexity', 'in_batch_dim': [8, 496], 'batch_size': 8, 'flops': 9808422927232}, {'type': 'perplexity', 'in_batch_dim': [8, 448], 'batch_size': 8, 'flops': 8859220715008}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6011614078336}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3796808916480}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3480408179072}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5062411866112}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4429610391296}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4113209653888}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4746011128704}, {'type': 'perplexity', 'in_batch_dim': [8, 320], 'batch_size': 8, 'flops': 6328014815744}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6011614078336}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3480408179072}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3796808916480}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3796808916480}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4746011128704}, {'type': 'perplexity', 'in_batch_dim': [8, 416], 'batch_size': 8, 'flops': 8226419240192}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5695213340928}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6011614078336}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6011614078336}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4746011128704}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3480408179072}, {'type': 'perplexity', 'in_batch_dim': [8, 160], 'batch_size': 8, 'flops': 3164007441664}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5695213340928}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3480408179072}, {'type': 'perplexity', 'in_batch_dim': [8, 400], 'batch_size': 8, 'flops': 7910018502784}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4429610391296}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5378812603520}, {'type': 'perplexity', 'in_batch_dim': [8, 400], 'batch_size': 8, 'flops': 7910018502784}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5378812603520}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3480408179072}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6011614078336}, {'type': 'perplexity', 'in_batch_dim': [8, 160], 'batch_size': 8, 'flops': 3164007441664}, {'type': 'perplexity', 'in_batch_dim': [8, 352], 'batch_size': 8, 'flops': 6960816290560}, {'type': 'perplexity', 'in_batch_dim': [8, 320], 'batch_size': 8, 'flops': 6328014815744}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5062411866112}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5062411866112}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4113209653888}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4429610391296}, {'type': 'perplexity', 'in_batch_dim': [8, 352], 'batch_size': 8, 'flops': 6960816290560}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4113209653888}, {'type': 'perplexity', 'in_batch_dim': [8, 160], 'batch_size': 8, 'flops': 3164007441664}, {'type': 'perplexity', 'in_batch_dim': [8, 336], 'batch_size': 8, 'flops': 6644415553152}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4429610391296}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3796808916480}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5378812603520}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5378812603520}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4113209653888}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5378812603520}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3796808916480}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5378812603520}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5378812603520}, {'type': 'perplexity', 'in_batch_dim': [8, 160], 'batch_size': 8, 'flops': 3164007441664}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3480408179072}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6011614078336}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5695213340928}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5378812603520}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5378812603520}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4429610391296}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4746011128704}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6011614078336}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5062411866112}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6011614078336}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5378812603520}, {'type': 'perplexity', 'in_batch_dim': [8, 160], 'batch_size': 8, 'flops': 3164007441664}, {'type': 'perplexity', 'in_batch_dim': [8, 160], 'batch_size': 8, 'flops': 3164007441664}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5062411866112}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5062411866112}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3796808916480}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6011614078336}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4113209653888}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4746011128704}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5378812603520}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3480408179072}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3796808916480}, {'type': 'perplexity', 'in_batch_dim': [8, 320], 'batch_size': 8, 'flops': 6328014815744}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5062411866112}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4746011128704}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5062411866112}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4746011128704}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6011614078336}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3796808916480}, {'type': 'perplexity', 'in_batch_dim': [8, 160], 'batch_size': 8, 'flops': 3164007441664}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2847606704256}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4429610391296}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4746011128704}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5062411866112}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5378812603520}, {'type': 'perplexity', 'in_batch_dim': [8, 464], 'batch_size': 8, 'flops': 9175621452416}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4113209653888}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3480408179072}, {'type': 'perplexity', 'in_batch_dim': [8, 384], 'batch_size': 8, 'flops': 7593617765376}, {'type': 'perplexity', 'in_batch_dim': [8, 448], 'batch_size': 8, 'flops': 8859220715008}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5378812603520}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4113209653888}, {'type': 'perplexity', 'in_batch_dim': [8, 320], 'batch_size': 8, 'flops': 6328014815744}, {'type': 'perplexity', 'in_batch_dim': [8, 560], 'batch_size': 8, 'flops': 11074025876864}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5378812603520}, {'type': 'perplexity', 'in_batch_dim': [8, 160], 'batch_size': 8, 'flops': 3164007441664}, {'type': 'perplexity', 'in_batch_dim': [8, 320], 'batch_size': 8, 'flops': 6328014815744}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5062411866112}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6011614078336}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4746011128704}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6011614078336}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3796808916480}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4429610391296}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4113209653888}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4746011128704}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3480408179072}, {'type': 'perplexity', 'in_batch_dim': [8, 384], 'batch_size': 8, 'flops': 7593617765376}, {'type': 'perplexity', 'in_batch_dim': [8, 576], 'batch_size': 8, 'flops': 11390426614272}, {'type': 'perplexity', 'in_batch_dim': [8, 160], 'batch_size': 8, 'flops': 3164007441664}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4746011128704}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4746011128704}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4429610391296}, {'type': 'perplexity', 'in_batch_dim': [8, 336], 'batch_size': 8, 'flops': 6644415553152}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4746011128704}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3796808916480}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3796808916480}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5062411866112}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5378812603520}, {'type': 'perplexity', 'in_batch_dim': [8, 368], 'batch_size': 8, 'flops': 7277217027968}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3480408179072}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4113209653888}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4746011128704}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3480408179072}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4746011128704}, {'type': 'perplexity', 'in_batch_dim': [8, 1168], 'batch_size': 8, 'flops': 23097253898368}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5062411866112}, {'type': 'perplexity', 'in_batch_dim': [8, 160], 'batch_size': 8, 'flops': 3164007441664}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4746011128704}, {'type': 'perplexity', 'in_batch_dim': [8, 640], 'batch_size': 8, 'flops': 12656029563904}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5378812603520}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4429610391296}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4429610391296}, {'type': 'perplexity', 'in_batch_dim': [8, 384], 'batch_size': 8, 'flops': 7593617765376}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3480408179072}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4113209653888}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3796808916480}, {'type': 'perplexity', 'in_batch_dim': [8, 336], 'batch_size': 8, 'flops': 6644415553152}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3480408179072}, {'type': 'perplexity', 'in_batch_dim': [6, 208], 'batch_size': 8, 'flops': 4113209653888}], 'timestamp': '2025-09-10 02:16:54.824750', 'step': 441, 'epoch': 1} {'type': 'pplx', 'content': 15501530.366672913, 'timestamp': '2025-09-10 02:16:54.827696', 'step': 441, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 5695507448064}, 'timestamp': '2025-09-10 02:16:54.858250', 'step': 441, 'epoch': 1} {'type': 'loss', 'content': 0.01051324512809515, 'timestamp': '2025-09-10 02:16:54.862020', 'step': 442, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 5695507448064}, 'timestamp': '2025-09-10 02:16:54.895486', 'step': 442, 'epoch': 1} {'type': 'loss', 'content': 0.01905696466565132, 'timestamp': '2025-09-10 02:16:54.899808', 'step': 443, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 5695507448064}, 'timestamp': '2025-09-10 02:16:54.931097', 'step': 443, 'epoch': 1} {'type': 'loss', 'content': 0.013812151737511158, 'timestamp': '2025-09-10 02:16:54.956332', 'step': 444, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 384], 'flops': 11390752160256}, 'timestamp': '2025-09-10 02:16:54.998895', 'step': 444, 'epoch': 1} {'type': 'loss', 'content': 0.023278802633285522, 'timestamp': '2025-09-10 02:16:55.012229', 'step': 445, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 5695507448064}, 'timestamp': '2025-09-10 02:16:55.043104', 'step': 445, 'epoch': 1} {'type': 'loss', 'content': 0.027200039476156235, 'timestamp': '2025-09-10 02:16:55.047443', 'step': 446, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 176], 'flops': 5220903722048}, 'timestamp': '2025-09-10 02:16:55.077695', 'step': 446, 'epoch': 1} {'type': 'loss', 'content': 0.022835474461317062, 'timestamp': '2025-09-10 02:16:55.083632', 'step': 447, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 320], 'flops': 9492337256192}, 'timestamp': '2025-09-10 02:16:55.115485', 'step': 447, 'epoch': 1} {'type': 'loss', 'content': 0.00839492492377758, 'timestamp': '2025-09-10 02:16:55.149070', 'step': 448, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 8543129804160}, 'timestamp': '2025-09-10 02:16:55.179000', 'step': 448, 'epoch': 1} {'type': 'loss', 'content': 0.026511041447520256, 'timestamp': '2025-09-10 02:16:55.187400', 'step': 449, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 6644714900096}, 'timestamp': '2025-09-10 02:16:55.223189', 'step': 449, 'epoch': 1} {'type': 'loss', 'content': 0.015092065557837486, 'timestamp': '2025-09-10 02:16:55.230247', 'step': 450, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 6644714900096}, 'timestamp': '2025-09-10 02:16:55.259934', 'step': 450, 'epoch': 1} {'type': 'loss', 'content': 0.017918048426508904, 'timestamp': '2025-09-10 02:16:55.267050', 'step': 451, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 5695507448064}, 'timestamp': '2025-09-10 02:16:55.297124', 'step': 451, 'epoch': 1} {'type': 'loss', 'content': 0.017638269811868668, 'timestamp': '2025-09-10 02:16:55.322545', 'step': 452, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 5695507448064}, 'timestamp': '2025-09-10 02:16:55.351949', 'step': 452, 'epoch': 1} {'type': 'loss', 'content': 0.012330381199717522, 'timestamp': '2025-09-10 02:16:55.354182', 'step': 453, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 320], 'flops': 9492337256192}, 'timestamp': '2025-09-10 02:16:55.384704', 'step': 453, 'epoch': 1} {'type': 'loss', 'content': 0.011708649806678295, 'timestamp': '2025-09-10 02:16:55.397277', 'step': 454, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 8068526078144}, 'timestamp': '2025-09-10 02:16:55.429707', 'step': 454, 'epoch': 1} {'type': 'loss', 'content': 0.02566845901310444, 'timestamp': '2025-09-10 02:16:55.440071', 'step': 455, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 176], 'flops': 5220903722048}, 'timestamp': '2025-09-10 02:16:55.470922', 'step': 455, 'epoch': 1} {'type': 'loss', 'content': 0.0025598767679184675, 'timestamp': '2025-09-10 02:16:55.495870', 'step': 456, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 7593922352128}, 'timestamp': '2025-09-10 02:16:55.531353', 'step': 456, 'epoch': 1} {'type': 'loss', 'content': 0.012102210894227028, 'timestamp': '2025-09-10 02:16:55.537883', 'step': 457, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 5695507448064}, 'timestamp': '2025-09-10 02:16:55.569169', 'step': 457, 'epoch': 1} {'type': 'loss', 'content': 0.009209878742694855, 'timestamp': '2025-09-10 02:16:55.573634', 'step': 458, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 7593922352128}, 'timestamp': '2025-09-10 02:16:55.607979', 'step': 458, 'epoch': 1} {'type': 'loss', 'content': 0.009584350511431694, 'timestamp': '2025-09-10 02:16:55.615763', 'step': 459, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 7119318626112}, 'timestamp': '2025-09-10 02:16:55.651621', 'step': 459, 'epoch': 1} {'type': 'loss', 'content': 0.016596131026744843, 'timestamp': '2025-09-10 02:16:55.679599', 'step': 460, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 5695507448064}, 'timestamp': '2025-09-10 02:16:55.717580', 'step': 460, 'epoch': 1} {'type': 'loss', 'content': 0.02308899164199829, 'timestamp': '2025-09-10 02:16:55.719799', 'step': 461, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 5695507448064}, 'timestamp': '2025-09-10 02:16:55.749684', 'step': 461, 'epoch': 1} {'type': 'loss', 'content': 0.016699977219104767, 'timestamp': '2025-09-10 02:16:55.754325', 'step': 462, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 336], 'flops': 9966940982208}, 'timestamp': '2025-09-10 02:16:55.787359', 'step': 462, 'epoch': 1} {'type': 'loss', 'content': 0.02096674218773842, 'timestamp': '2025-09-10 02:16:55.800723', 'step': 463, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 304], 'flops': 9017733530176}, 'timestamp': '2025-09-10 02:16:55.831583', 'step': 463, 'epoch': 1} {'type': 'loss', 'content': 0.0141488928347826, 'timestamp': '2025-09-10 02:16:55.864678', 'step': 464, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 5695507448064}, 'timestamp': '2025-09-10 02:16:55.899549', 'step': 464, 'epoch': 1} {'type': 'loss', 'content': 0.02313445508480072, 'timestamp': '2025-09-10 02:16:55.904295', 'step': 465, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 6644714900096}, 'timestamp': '2025-09-10 02:16:55.940946', 'step': 465, 'epoch': 1} {'type': 'loss', 'content': 0.005347964819520712, 'timestamp': '2025-09-10 02:16:55.947951', 'step': 466, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 7119318626112}, 'timestamp': '2025-09-10 02:16:55.982414', 'step': 466, 'epoch': 1} {'type': 'loss', 'content': 0.019470447674393654, 'timestamp': '2025-09-10 02:16:55.989966', 'step': 467, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 8543129804160}, 'timestamp': '2025-09-10 02:16:56.020181', 'step': 467, 'epoch': 1} {'type': 'loss', 'content': 0.01922597922384739, 'timestamp': '2025-09-10 02:16:56.052020', 'step': 468, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 8068526078144}, 'timestamp': '2025-09-10 02:16:56.082140', 'step': 468, 'epoch': 1} {'type': 'loss', 'content': 0.02592761255800724, 'timestamp': '2025-09-10 02:16:56.089958', 'step': 469, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 6644714900096}, 'timestamp': '2025-09-10 02:16:56.120311', 'step': 469, 'epoch': 1} {'type': 'loss', 'content': 0.022186074405908585, 'timestamp': '2025-09-10 02:16:56.127526', 'step': 470, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 8068526078144}, 'timestamp': '2025-09-10 02:16:56.158541', 'step': 470, 'epoch': 1} {'type': 'loss', 'content': 0.022316042333841324, 'timestamp': '2025-09-10 02:16:56.168674', 'step': 471, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-10 02:16:56.198880', 'step': 471, 'epoch': 1} {'type': 'loss', 'content': 0.007065699901431799, 'timestamp': '2025-09-10 02:16:56.222178', 'step': 472, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 6644714900096}, 'timestamp': '2025-09-10 02:16:56.259536', 'step': 472, 'epoch': 1} {'type': 'loss', 'content': 0.016231011599302292, 'timestamp': '2025-09-10 02:16:56.264118', 'step': 473, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 7119318626112}, 'timestamp': '2025-09-10 02:16:56.294749', 'step': 473, 'epoch': 1} {'type': 'loss', 'content': 0.024839241057634354, 'timestamp': '2025-09-10 02:16:56.302103', 'step': 474, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 176], 'flops': 5220903722048}, 'timestamp': '2025-09-10 02:16:56.332452', 'step': 474, 'epoch': 1} {'type': 'loss', 'content': 0.04020370915532112, 'timestamp': '2025-09-10 02:16:56.336544', 'step': 475, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 6170111174080}, 'timestamp': '2025-09-10 02:16:56.368297', 'step': 475, 'epoch': 1} {'type': 'loss', 'content': 0.017098741605877876, 'timestamp': '2025-09-10 02:16:56.396144', 'step': 476, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 6170111174080}, 'timestamp': '2025-09-10 02:16:56.429707', 'step': 476, 'epoch': 1} {'type': 'loss', 'content': 0.014849187806248665, 'timestamp': '2025-09-10 02:16:56.436676', 'step': 477, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 6644714900096}, 'timestamp': '2025-09-10 02:16:56.475817', 'step': 477, 'epoch': 1} {'type': 'loss', 'content': 0.019702225923538208, 'timestamp': '2025-09-10 02:16:56.482832', 'step': 478, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 8068526078144}, 'timestamp': '2025-09-10 02:16:56.514808', 'step': 478, 'epoch': 1} {'type': 'loss', 'content': 0.027971146628260612, 'timestamp': '2025-09-10 02:16:56.524882', 'step': 479, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 5695507448064}, 'timestamp': '2025-09-10 02:16:56.556254', 'step': 479, 'epoch': 1} {'type': 'loss', 'content': 0.00662533612921834, 'timestamp': '2025-09-10 02:16:56.581638', 'step': 480, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 7119318626112}, 'timestamp': '2025-09-10 02:16:56.612469', 'step': 480, 'epoch': 1} {'type': 'loss', 'content': 0.017041940242052078, 'timestamp': '2025-09-10 02:16:56.618341', 'step': 481, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 7593922352128}, 'timestamp': '2025-09-10 02:16:56.650763', 'step': 481, 'epoch': 1} {'type': 'loss', 'content': 0.0072524151764810085, 'timestamp': '2025-09-10 02:16:56.658474', 'step': 482, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 6644714900096}, 'timestamp': '2025-09-10 02:16:56.691076', 'step': 482, 'epoch': 1} {'type': 'loss', 'content': 0.01302304957062006, 'timestamp': '2025-09-10 02:16:56.698078', 'step': 483, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 8068526078144}, 'timestamp': '2025-09-10 02:16:56.733662', 'step': 483, 'epoch': 1} {'type': 'loss', 'content': 0.008182940073311329, 'timestamp': '2025-09-10 02:16:56.764429', 'step': 484, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 5695507448064}, 'timestamp': '2025-09-10 02:16:56.795179', 'step': 484, 'epoch': 1} {'type': 'loss', 'content': 0.02642636187374592, 'timestamp': '2025-09-10 02:16:56.797355', 'step': 485, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 400], 'flops': 11865355886272}, 'timestamp': '2025-09-10 02:16:56.836091', 'step': 485, 'epoch': 1} {'type': 'loss', 'content': 0.025703372433781624, 'timestamp': '2025-09-10 02:16:56.851817', 'step': 486, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 6170111174080}, 'timestamp': '2025-09-10 02:16:56.882364', 'step': 486, 'epoch': 1} {'type': 'loss', 'content': 0.006586894392967224, 'timestamp': '2025-09-10 02:16:56.889256', 'step': 487, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 6170111174080}, 'timestamp': '2025-09-10 02:16:56.920015', 'step': 487, 'epoch': 1} {'type': 'loss', 'content': 0.03618357703089714, 'timestamp': '2025-09-10 02:16:56.947737', 'step': 488, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 7593922352128}, 'timestamp': '2025-09-10 02:16:56.980013', 'step': 488, 'epoch': 1} {'type': 'loss', 'content': 0.02346952259540558, 'timestamp': '2025-09-10 02:16:56.985354', 'step': 489, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 432], 'flops': 12814563338304}, 'timestamp': '2025-09-10 02:16:57.031808', 'step': 489, 'epoch': 1} {'type': 'loss', 'content': 0.02613210491836071, 'timestamp': '2025-09-10 02:16:57.045157', 'step': 490, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 5695507448064}, 'timestamp': '2025-09-10 02:16:57.074931', 'step': 490, 'epoch': 1} {'type': 'loss', 'content': 0.013658554293215275, 'timestamp': '2025-09-10 02:16:57.079421', 'step': 491, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 352], 'flops': 10441544708224}, 'timestamp': '2025-09-10 02:16:57.113555', 'step': 491, 'epoch': 1} {'type': 'loss', 'content': 0.023100513964891434, 'timestamp': '2025-09-10 02:16:57.148219', 'step': 492, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 7593922352128}, 'timestamp': '2025-09-10 02:16:57.179933', 'step': 492, 'epoch': 1} {'type': 'loss', 'content': 0.010461096651852131, 'timestamp': '2025-09-10 02:16:57.185393', 'step': 493, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 176], 'flops': 5220903722048}, 'timestamp': '2025-09-10 02:16:57.216369', 'step': 493, 'epoch': 1} {'type': 'loss', 'content': 0.01992633379995823, 'timestamp': '2025-09-10 02:16:57.220455', 'step': 494, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 6170111174080}, 'timestamp': '2025-09-10 02:16:57.251240', 'step': 494, 'epoch': 1} {'type': 'loss', 'content': 0.008644700050354004, 'timestamp': '2025-09-10 02:16:57.257576', 'step': 495, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 4746299996032}, 'timestamp': '2025-09-10 02:16:57.287936', 'step': 495, 'epoch': 1} {'type': 'loss', 'content': 0.018692122772336006, 'timestamp': '2025-09-10 02:16:57.313229', 'step': 496, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-10 02:16:57.343998', 'step': 496, 'epoch': 1} {'type': 'loss', 'content': 0.010719933547079563, 'timestamp': '2025-09-10 02:16:57.346013', 'step': 497, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 416], 'flops': 12339959612288}, 'timestamp': '2025-09-10 02:16:57.385187', 'step': 497, 'epoch': 1} {'type': 'loss', 'content': 0.014836416579782963, 'timestamp': '2025-09-10 02:16:57.401132', 'step': 498, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:16:57.439967', 'step': 498, 'epoch': 1} {'type': 'loss', 'content': 0.009858435951173306, 'timestamp': '2025-09-10 02:16:57.444529', 'step': 499, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 5695507448064}, 'timestamp': '2025-09-10 02:16:57.484973', 'step': 499, 'epoch': 1} {'type': 'loss', 'content': 0.014942965470254421, 'timestamp': '2025-09-10 02:16:57.510192', 'step': 500, 'epoch': 1} {'type': 'info', 'content': 'Checkpoint saved at step 500', 'timestamp': '2025-09-10 02:17:02.717209', 'step': 500, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 7119318626112}, 'timestamp': '2025-09-10 02:17:02.750251', 'step': 500, 'epoch': 1} {'type': 'loss', 'content': 0.010459995828568935, 'timestamp': '2025-09-10 02:17:02.754425', 'step': 501, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 8068526078144}, 'timestamp': '2025-09-10 02:17:02.786235', 'step': 501, 'epoch': 1} {'type': 'loss', 'content': 0.00723966583609581, 'timestamp': '2025-09-10 02:17:02.795255', 'step': 502, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 6644714900096}, 'timestamp': '2025-09-10 02:17:02.832596', 'step': 502, 'epoch': 1} {'type': 'loss', 'content': 0.023343030363321304, 'timestamp': '2025-09-10 02:17:02.839413', 'step': 503, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 8068526078144}, 'timestamp': '2025-09-10 02:17:02.871628', 'step': 503, 'epoch': 1} {'type': 'loss', 'content': 0.01857823319733143, 'timestamp': '2025-09-10 02:17:02.902391', 'step': 504, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 368], 'flops': 10916148434240}, 'timestamp': '2025-09-10 02:17:02.935396', 'step': 504, 'epoch': 1} {'type': 'loss', 'content': 0.017975622788071632, 'timestamp': '2025-09-10 02:17:02.948532', 'step': 505, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 7593922352128}, 'timestamp': '2025-09-10 02:17:02.994342', 'step': 505, 'epoch': 1} {'type': 'loss', 'content': 0.029632670804858208, 'timestamp': '2025-09-10 02:17:03.002054', 'step': 506, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 7593922352128}, 'timestamp': '2025-09-10 02:17:03.042887', 'step': 506, 'epoch': 1} {'type': 'loss', 'content': 0.030040541663765907, 'timestamp': '2025-09-10 02:17:03.050694', 'step': 507, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 7593922352128}, 'timestamp': '2025-09-10 02:17:03.081641', 'step': 507, 'epoch': 1} {'type': 'loss', 'content': 0.010655703954398632, 'timestamp': '2025-09-10 02:17:03.110342', 'step': 508, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 304], 'flops': 9017733530176}, 'timestamp': '2025-09-10 02:17:03.141107', 'step': 508, 'epoch': 1} {'type': 'loss', 'content': 0.010932376608252525, 'timestamp': '2025-09-10 02:17:03.150935', 'step': 509, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 320], 'flops': 9492337256192}, 'timestamp': '2025-09-10 02:17:03.189097', 'step': 509, 'epoch': 1} {'type': 'loss', 'content': 0.016881374642252922, 'timestamp': '2025-09-10 02:17:03.201651', 'step': 510, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 368], 'flops': 10916148434240}, 'timestamp': '2025-09-10 02:17:03.238792', 'step': 510, 'epoch': 1} {'type': 'loss', 'content': 0.011083531193435192, 'timestamp': '2025-09-10 02:17:03.252584', 'step': 511, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 8068526078144}, 'timestamp': '2025-09-10 02:17:03.288534', 'step': 511, 'epoch': 1} {'type': 'loss', 'content': 0.023147176951169968, 'timestamp': '2025-09-10 02:17:03.319590', 'step': 512, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 7593922352128}, 'timestamp': '2025-09-10 02:17:03.365430', 'step': 512, 'epoch': 1} {'type': 'loss', 'content': 0.03139190003275871, 'timestamp': '2025-09-10 02:17:03.370617', 'step': 513, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 416], 'flops': 12339959612288}, 'timestamp': '2025-09-10 02:17:03.420989', 'step': 513, 'epoch': 1} {'type': 'loss', 'content': 0.02452153153717518, 'timestamp': '2025-09-10 02:17:03.436882', 'step': 514, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 6170111174080}, 'timestamp': '2025-09-10 02:17:03.469459', 'step': 514, 'epoch': 1} {'type': 'loss', 'content': 0.012951391749083996, 'timestamp': '2025-09-10 02:17:03.476526', 'step': 515, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 352], 'flops': 10441544708224}, 'timestamp': '2025-09-10 02:17:03.523716', 'step': 515, 'epoch': 1} {'type': 'loss', 'content': 0.014777913689613342, 'timestamp': '2025-09-10 02:17:03.558303', 'step': 516, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 336], 'flops': 9966940982208}, 'timestamp': '2025-09-10 02:17:03.591745', 'step': 516, 'epoch': 1} {'type': 'loss', 'content': 0.03574973717331886, 'timestamp': '2025-09-10 02:17:03.604408', 'step': 517, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 5695507448064}, 'timestamp': '2025-09-10 02:17:03.634689', 'step': 517, 'epoch': 1} {'type': 'loss', 'content': 0.009790212847292423, 'timestamp': '2025-09-10 02:17:03.641426', 'step': 518, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 8543129804160}, 'timestamp': '2025-09-10 02:17:03.676660', 'step': 518, 'epoch': 1} {'type': 'loss', 'content': 0.02090480551123619, 'timestamp': '2025-09-10 02:17:03.688328', 'step': 519, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 176], 'flops': 5220903722048}, 'timestamp': '2025-09-10 02:17:03.725888', 'step': 519, 'epoch': 1} {'type': 'loss', 'content': 0.0349605493247509, 'timestamp': '2025-09-10 02:17:03.750814', 'step': 520, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 7119318626112}, 'timestamp': '2025-09-10 02:17:03.784064', 'step': 520, 'epoch': 1} {'type': 'loss', 'content': 0.010910294018685818, 'timestamp': '2025-09-10 02:17:03.789203', 'step': 521, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 176], 'flops': 5220903722048}, 'timestamp': '2025-09-10 02:17:03.821200', 'step': 521, 'epoch': 1} {'type': 'loss', 'content': 0.013309179805219173, 'timestamp': '2025-09-10 02:17:03.825439', 'step': 522, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 304], 'flops': 9017733530176}, 'timestamp': '2025-09-10 02:17:03.856521', 'step': 522, 'epoch': 1} {'type': 'loss', 'content': 0.028638780117034912, 'timestamp': '2025-09-10 02:17:03.868386', 'step': 523, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 7593922352128}, 'timestamp': '2025-09-10 02:17:03.900181', 'step': 523, 'epoch': 1} {'type': 'loss', 'content': 0.009006711654365063, 'timestamp': '2025-09-10 02:17:03.928643', 'step': 524, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 4746299996032}, 'timestamp': '2025-09-10 02:17:03.958522', 'step': 524, 'epoch': 1} {'type': 'loss', 'content': 0.02321997843682766, 'timestamp': '2025-09-10 02:17:03.963456', 'step': 525, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 7593922352128}, 'timestamp': '2025-09-10 02:17:03.995505', 'step': 525, 'epoch': 1} {'type': 'loss', 'content': 0.02315063215792179, 'timestamp': '2025-09-10 02:17:04.007499', 'step': 526, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 6644714900096}, 'timestamp': '2025-09-10 02:17:04.053767', 'step': 526, 'epoch': 1} {'type': 'loss', 'content': 0.015552300028502941, 'timestamp': '2025-09-10 02:17:04.060843', 'step': 527, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 8068526078144}, 'timestamp': '2025-09-10 02:17:04.092583', 'step': 527, 'epoch': 1} {'type': 'loss', 'content': 0.029168089851737022, 'timestamp': '2025-09-10 02:17:04.123678', 'step': 528, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 7119318626112}, 'timestamp': '2025-09-10 02:17:04.157849', 'step': 528, 'epoch': 1} {'type': 'loss', 'content': 0.003137963591143489, 'timestamp': '2025-09-10 02:17:04.163087', 'step': 529, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 7593922352128}, 'timestamp': '2025-09-10 02:17:04.196961', 'step': 529, 'epoch': 1} {'type': 'loss', 'content': 0.03553525730967522, 'timestamp': '2025-09-10 02:17:04.204759', 'step': 530, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 304], 'flops': 9017733530176}, 'timestamp': '2025-09-10 02:17:04.239239', 'step': 530, 'epoch': 1} {'type': 'loss', 'content': 0.022633006796240807, 'timestamp': '2025-09-10 02:17:04.251415', 'step': 531, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 7593922352128}, 'timestamp': '2025-09-10 02:17:04.282407', 'step': 531, 'epoch': 1} {'type': 'loss', 'content': 0.016931835561990738, 'timestamp': '2025-09-10 02:17:04.311018', 'step': 532, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 480], 'flops': 14238374516352}, 'timestamp': '2025-09-10 02:17:04.351459', 'step': 532, 'epoch': 1} {'type': 'loss', 'content': 0.010154195129871368, 'timestamp': '2025-09-10 02:17:04.368471', 'step': 533, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 6644714900096}, 'timestamp': '2025-09-10 02:17:04.402207', 'step': 533, 'epoch': 1} {'type': 'loss', 'content': 0.035087209194898605, 'timestamp': '2025-09-10 02:17:04.409146', 'step': 534, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 5695507448064}, 'timestamp': '2025-09-10 02:17:04.439763', 'step': 534, 'epoch': 1} {'type': 'loss', 'content': 0.013602891936898232, 'timestamp': '2025-09-10 02:17:04.444187', 'step': 535, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 688], 'flops': 20408222954560}, 'timestamp': '2025-09-10 02:17:04.512971', 'step': 535, 'epoch': 1} {'type': 'loss', 'content': 0.010147054679691792, 'timestamp': '2025-09-10 02:17:04.557938', 'step': 536, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 4746299996032}, 'timestamp': '2025-09-10 02:17:04.588485', 'step': 536, 'epoch': 1} {'type': 'loss', 'content': 0.009458757936954498, 'timestamp': '2025-09-10 02:17:04.598718', 'step': 537, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 7593922352128}, 'timestamp': '2025-09-10 02:17:04.632882', 'step': 537, 'epoch': 1} {'type': 'loss', 'content': 0.01887761428952217, 'timestamp': '2025-09-10 02:17:04.640519', 'step': 538, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 7119318626112}, 'timestamp': '2025-09-10 02:17:04.672166', 'step': 538, 'epoch': 1} {'type': 'loss', 'content': 0.013571500778198242, 'timestamp': '2025-09-10 02:17:04.679851', 'step': 539, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 4746299996032}, 'timestamp': '2025-09-10 02:17:04.710094', 'step': 539, 'epoch': 1} {'type': 'loss', 'content': 0.018864035606384277, 'timestamp': '2025-09-10 02:17:04.734715', 'step': 540, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 304], 'flops': 9017733530176}, 'timestamp': '2025-09-10 02:17:04.766172', 'step': 540, 'epoch': 1} {'type': 'loss', 'content': 0.014328965917229652, 'timestamp': '2025-09-10 02:17:04.775922', 'step': 541, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 7593922352128}, 'timestamp': '2025-09-10 02:17:04.807076', 'step': 541, 'epoch': 1} {'type': 'loss', 'content': 0.007837352342903614, 'timestamp': '2025-09-10 02:17:04.814804', 'step': 542, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 6644714900096}, 'timestamp': '2025-09-10 02:17:04.848106', 'step': 542, 'epoch': 1} {'type': 'loss', 'content': 0.020064374431967735, 'timestamp': '2025-09-10 02:17:04.855243', 'step': 543, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 400], 'flops': 11865355886272}, 'timestamp': '2025-09-10 02:17:04.895354', 'step': 543, 'epoch': 1} {'type': 'loss', 'content': 0.027344727888703346, 'timestamp': '2025-09-10 02:17:04.931857', 'step': 544, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 7119318626112}, 'timestamp': '2025-09-10 02:17:04.962318', 'step': 544, 'epoch': 1} {'type': 'loss', 'content': 0.035422343760728836, 'timestamp': '2025-09-10 02:17:04.969466', 'step': 545, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 560], 'flops': 16611393146432}, 'timestamp': '2025-09-10 02:17:05.026681', 'step': 545, 'epoch': 1} {'type': 'loss', 'content': 0.009574404917657375, 'timestamp': '2025-09-10 02:17:05.046057', 'step': 546, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 6170111174080}, 'timestamp': '2025-09-10 02:17:05.083897', 'step': 546, 'epoch': 1} {'type': 'loss', 'content': 0.012771585024893284, 'timestamp': '2025-09-10 02:17:05.090641', 'step': 547, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 8068526078144}, 'timestamp': '2025-09-10 02:17:05.123234', 'step': 547, 'epoch': 1} {'type': 'loss', 'content': 0.007596482522785664, 'timestamp': '2025-09-10 02:17:05.154316', 'step': 548, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 6170111174080}, 'timestamp': '2025-09-10 02:17:05.185184', 'step': 548, 'epoch': 1} {'type': 'loss', 'content': 0.029550552368164062, 'timestamp': '2025-09-10 02:17:05.189804', 'step': 549, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 5695507448064}, 'timestamp': '2025-09-10 02:17:05.221572', 'step': 549, 'epoch': 1} {'type': 'loss', 'content': 0.005684220232069492, 'timestamp': '2025-09-10 02:17:05.226030', 'step': 550, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 320], 'flops': 9492337256192}, 'timestamp': '2025-09-10 02:17:05.259344', 'step': 550, 'epoch': 1} {'type': 'loss', 'content': 0.020296234637498856, 'timestamp': '2025-09-10 02:17:05.271859', 'step': 551, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 304], 'flops': 9017733530176}, 'timestamp': '2025-09-10 02:17:05.306189', 'step': 551, 'epoch': 1} {'type': 'loss', 'content': 0.011387856677174568, 'timestamp': '2025-09-10 02:17:05.339356', 'step': 552, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 6644714900096}, 'timestamp': '2025-09-10 02:17:05.370234', 'step': 552, 'epoch': 1} {'type': 'loss', 'content': 0.005133692175149918, 'timestamp': '2025-09-10 02:17:05.374915', 'step': 553, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 304], 'flops': 9017733530176}, 'timestamp': '2025-09-10 02:17:05.405630', 'step': 553, 'epoch': 1} {'type': 'loss', 'content': 0.015191650949418545, 'timestamp': '2025-09-10 02:17:05.417754', 'step': 554, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 5695507448064}, 'timestamp': '2025-09-10 02:17:05.449245', 'step': 554, 'epoch': 1} {'type': 'loss', 'content': 0.01479465514421463, 'timestamp': '2025-09-10 02:17:05.456377', 'step': 555, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 5695507448064}, 'timestamp': '2025-09-10 02:17:05.492055', 'step': 555, 'epoch': 1} {'type': 'loss', 'content': 0.00847632810473442, 'timestamp': '2025-09-10 02:17:05.517533', 'step': 556, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 5695507448064}, 'timestamp': '2025-09-10 02:17:05.558071', 'step': 556, 'epoch': 1} {'type': 'loss', 'content': 0.020174086093902588, 'timestamp': '2025-09-10 02:17:05.560370', 'step': 557, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 384], 'flops': 11390752160256}, 'timestamp': '2025-09-10 02:17:05.603236', 'step': 557, 'epoch': 1} {'type': 'loss', 'content': 0.008243663236498833, 'timestamp': '2025-09-10 02:17:05.617199', 'step': 558, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 5695507448064}, 'timestamp': '2025-09-10 02:17:05.650878', 'step': 558, 'epoch': 1} {'type': 'loss', 'content': 0.014073808677494526, 'timestamp': '2025-09-10 02:17:05.655400', 'step': 559, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 5695507448064}, 'timestamp': '2025-09-10 02:17:05.685836', 'step': 559, 'epoch': 1} {'type': 'loss', 'content': 0.012161211110651493, 'timestamp': '2025-09-10 02:17:05.711317', 'step': 560, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 6644714900096}, 'timestamp': '2025-09-10 02:17:05.744726', 'step': 560, 'epoch': 1} {'type': 'loss', 'content': 0.0253736712038517, 'timestamp': '2025-09-10 02:17:05.753532', 'step': 561, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 304], 'flops': 9017733530176}, 'timestamp': '2025-09-10 02:17:05.787108', 'step': 561, 'epoch': 1} {'type': 'loss', 'content': 0.00579653587192297, 'timestamp': '2025-09-10 02:17:05.799398', 'step': 562, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 368], 'flops': 10916148434240}, 'timestamp': '2025-09-10 02:17:05.844748', 'step': 562, 'epoch': 1} {'type': 'loss', 'content': 0.0027781727258116007, 'timestamp': '2025-09-10 02:17:05.858553', 'step': 563, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 176], 'flops': 5220903722048}, 'timestamp': '2025-09-10 02:17:05.894663', 'step': 563, 'epoch': 1} {'type': 'loss', 'content': 0.035328906029462814, 'timestamp': '2025-09-10 02:17:05.924916', 'step': 564, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 4746299996032}, 'timestamp': '2025-09-10 02:17:05.962930', 'step': 564, 'epoch': 1} {'type': 'loss', 'content': 0.00891299732029438, 'timestamp': '2025-09-10 02:17:05.965304', 'step': 565, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 8543129804160}, 'timestamp': '2025-09-10 02:17:05.998336', 'step': 565, 'epoch': 1} {'type': 'loss', 'content': 0.013710664585232735, 'timestamp': '2025-09-10 02:17:06.008845', 'step': 566, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 6170111174080}, 'timestamp': '2025-09-10 02:17:06.039916', 'step': 566, 'epoch': 1} {'type': 'loss', 'content': 0.00687979394569993, 'timestamp': '2025-09-10 02:17:06.046706', 'step': 567, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 4746299996032}, 'timestamp': '2025-09-10 02:17:06.077841', 'step': 567, 'epoch': 1} {'type': 'loss', 'content': 0.009851823560893536, 'timestamp': '2025-09-10 02:17:06.106375', 'step': 568, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 176], 'flops': 5220903722048}, 'timestamp': '2025-09-10 02:17:06.137339', 'step': 568, 'epoch': 1} {'type': 'loss', 'content': 0.012755611911416054, 'timestamp': '2025-09-10 02:17:06.143314', 'step': 569, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 8068526078144}, 'timestamp': '2025-09-10 02:17:06.174764', 'step': 569, 'epoch': 1} {'type': 'loss', 'content': 0.005232820753008127, 'timestamp': '2025-09-10 02:17:06.185075', 'step': 570, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 176], 'flops': 5220903722048}, 'timestamp': '2025-09-10 02:17:06.216416', 'step': 570, 'epoch': 1} {'type': 'loss', 'content': 0.025471851229667664, 'timestamp': '2025-09-10 02:17:06.220670', 'step': 571, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 7119318626112}, 'timestamp': '2025-09-10 02:17:06.261098', 'step': 571, 'epoch': 1} {'type': 'loss', 'content': 0.016110900789499283, 'timestamp': '2025-09-10 02:17:06.289501', 'step': 572, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 464], 'flops': 13763770790336}, 'timestamp': '2025-09-10 02:17:06.336757', 'step': 572, 'epoch': 1} {'type': 'loss', 'content': 0.018473317846655846, 'timestamp': '2025-09-10 02:17:06.353491', 'step': 573, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 320], 'flops': 9492337256192}, 'timestamp': '2025-09-10 02:17:06.385507', 'step': 573, 'epoch': 1} {'type': 'loss', 'content': 0.014140649698674679, 'timestamp': '2025-09-10 02:17:06.398092', 'step': 574, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 6170111174080}, 'timestamp': '2025-09-10 02:17:06.429189', 'step': 574, 'epoch': 1} {'type': 'loss', 'content': 0.011097794398665428, 'timestamp': '2025-09-10 02:17:06.435977', 'step': 575, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 6170111174080}, 'timestamp': '2025-09-10 02:17:06.466194', 'step': 575, 'epoch': 1} {'type': 'loss', 'content': 0.01416011806577444, 'timestamp': '2025-09-10 02:17:06.493932', 'step': 576, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 8543129804160}, 'timestamp': '2025-09-10 02:17:06.545475', 'step': 576, 'epoch': 1} {'type': 'loss', 'content': 0.014164241962134838, 'timestamp': '2025-09-10 02:17:06.553210', 'step': 577, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 336], 'flops': 9966940982208}, 'timestamp': '2025-09-10 02:17:06.591569', 'step': 577, 'epoch': 1} {'type': 'loss', 'content': 0.04556810483336449, 'timestamp': '2025-09-10 02:17:06.604998', 'step': 578, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 5695507448064}, 'timestamp': '2025-09-10 02:17:06.636536', 'step': 578, 'epoch': 1} {'type': 'loss', 'content': 0.010820058174431324, 'timestamp': '2025-09-10 02:17:06.639940', 'step': 579, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 8068526078144}, 'timestamp': '2025-09-10 02:17:06.673852', 'step': 579, 'epoch': 1} {'type': 'loss', 'content': 0.0034028550144284964, 'timestamp': '2025-09-10 02:17:06.704601', 'step': 580, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 7593922352128}, 'timestamp': '2025-09-10 02:17:06.736412', 'step': 580, 'epoch': 1} {'type': 'loss', 'content': 0.016602005809545517, 'timestamp': '2025-09-10 02:17:06.741634', 'step': 581, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 5695507448064}, 'timestamp': '2025-09-10 02:17:06.771605', 'step': 581, 'epoch': 1} {'type': 'loss', 'content': 0.05845152586698532, 'timestamp': '2025-09-10 02:17:06.776491', 'step': 582, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 176], 'flops': 5220903722048}, 'timestamp': '2025-09-10 02:17:06.808863', 'step': 582, 'epoch': 1} {'type': 'loss', 'content': 0.03493461757898331, 'timestamp': '2025-09-10 02:17:06.814928', 'step': 583, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 320], 'flops': 9492337256192}, 'timestamp': '2025-09-10 02:17:06.846547', 'step': 583, 'epoch': 1} {'type': 'loss', 'content': 0.021650012582540512, 'timestamp': '2025-09-10 02:17:06.879990', 'step': 584, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 6644714900096}, 'timestamp': '2025-09-10 02:17:06.913612', 'step': 584, 'epoch': 1} {'type': 'loss', 'content': 0.0019279540283605456, 'timestamp': '2025-09-10 02:17:06.918165', 'step': 585, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 8543129804160}, 'timestamp': '2025-09-10 02:17:06.949617', 'step': 585, 'epoch': 1} {'type': 'loss', 'content': 0.01664150133728981, 'timestamp': '2025-09-10 02:17:06.960220', 'step': 586, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 6170111174080}, 'timestamp': '2025-09-10 02:17:06.992181', 'step': 586, 'epoch': 1} {'type': 'loss', 'content': 0.02184317074716091, 'timestamp': '2025-09-10 02:17:06.998923', 'step': 587, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 7593922352128}, 'timestamp': '2025-09-10 02:17:07.043515', 'step': 587, 'epoch': 1} {'type': 'loss', 'content': 0.004326535388827324, 'timestamp': '2025-09-10 02:17:07.071694', 'step': 588, 'epoch': 1} {'type': 'flops', 'content': [{'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5695213340928}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3796808916480}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4429610391296}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4746011128704}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4746011128704}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6011614078336}, {'type': 'perplexity', 'in_batch_dim': [8, 736], 'batch_size': 8, 'flops': 14554433988352}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3796808916480}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4113209653888}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3796808916480}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4746011128704}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2847606704256}, {'type': 'perplexity', 'in_batch_dim': [8, 320], 'batch_size': 8, 'flops': 6328014815744}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5378812603520}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3796808916480}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4746011128704}, {'type': 'perplexity', 'in_batch_dim': [8, 352], 'batch_size': 8, 'flops': 6960816290560}, {'type': 'perplexity', 'in_batch_dim': [8, 384], 'batch_size': 8, 'flops': 7593617765376}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5378812603520}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4429610391296}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4113209653888}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5062411866112}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5062411866112}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4429610391296}, {'type': 'perplexity', 'in_batch_dim': [8, 480], 'batch_size': 8, 'flops': 9492022189824}, {'type': 'perplexity', 'in_batch_dim': [8, 448], 'batch_size': 8, 'flops': 8859220715008}, {'type': 'perplexity', 'in_batch_dim': [8, 320], 'batch_size': 8, 'flops': 6328014815744}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3796808916480}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6011614078336}, {'type': 'perplexity', 'in_batch_dim': [8, 320], 'batch_size': 8, 'flops': 6328014815744}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5695213340928}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4746011128704}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4429610391296}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5378812603520}, {'type': 'perplexity', 'in_batch_dim': [8, 544], 'batch_size': 8, 'flops': 10757625139456}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4429610391296}, {'type': 'perplexity', 'in_batch_dim': [8, 320], 'batch_size': 8, 'flops': 6328014815744}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4113209653888}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4113209653888}, {'type': 'perplexity', 'in_batch_dim': [8, 320], 'batch_size': 8, 'flops': 6328014815744}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3480408179072}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5062411866112}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6011614078336}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4429610391296}, {'type': 'perplexity', 'in_batch_dim': [8, 384], 'batch_size': 8, 'flops': 7593617765376}, {'type': 'perplexity', 'in_batch_dim': [8, 416], 'batch_size': 8, 'flops': 8226419240192}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4746011128704}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3480408179072}, {'type': 'perplexity', 'in_batch_dim': [8, 384], 'batch_size': 8, 'flops': 7593617765376}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3480408179072}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3480408179072}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5378812603520}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3796808916480}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4113209653888}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5062411866112}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5378812603520}, {'type': 'perplexity', 'in_batch_dim': [8, 320], 'batch_size': 8, 'flops': 6328014815744}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4429610391296}, {'type': 'perplexity', 'in_batch_dim': [8, 160], 'batch_size': 8, 'flops': 3164007441664}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4113209653888}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5695213340928}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3796808916480}, {'type': 'perplexity', 'in_batch_dim': [8, 160], 'batch_size': 8, 'flops': 3164007441664}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6011614078336}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5062411866112}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4429610391296}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3480408179072}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4746011128704}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3480408179072}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4113209653888}, {'type': 'perplexity', 'in_batch_dim': [8, 624], 'batch_size': 8, 'flops': 12339628826496}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4113209653888}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5695213340928}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5062411866112}, {'type': 'perplexity', 'in_batch_dim': [8, 384], 'batch_size': 8, 'flops': 7593617765376}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4429610391296}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4113209653888}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5062411866112}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4429610391296}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5695213340928}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3796808916480}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5378812603520}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4429610391296}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5378812603520}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3796808916480}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5695213340928}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4429610391296}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5062411866112}, {'type': 'perplexity', 'in_batch_dim': [8, 160], 'batch_size': 8, 'flops': 3164007441664}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4746011128704}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5695213340928}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3796808916480}, {'type': 'perplexity', 'in_batch_dim': [8, 416], 'batch_size': 8, 'flops': 8226419240192}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3796808916480}, {'type': 'perplexity', 'in_batch_dim': [8, 160], 'batch_size': 8, 'flops': 3164007441664}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4746011128704}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3796808916480}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2847606704256}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5378812603520}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4429610391296}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4113209653888}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4429610391296}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6011614078336}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4113209653888}, {'type': 'perplexity', 'in_batch_dim': [8, 320], 'batch_size': 8, 'flops': 6328014815744}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4746011128704}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4746011128704}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3796808916480}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5062411866112}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3480408179072}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3796808916480}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3480408179072}, {'type': 'perplexity', 'in_batch_dim': [8, 352], 'batch_size': 8, 'flops': 6960816290560}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5062411866112}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4429610391296}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6011614078336}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5062411866112}, {'type': 'perplexity', 'in_batch_dim': [8, 576], 'batch_size': 8, 'flops': 11390426614272}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4429610391296}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3796808916480}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6011614078336}, {'type': 'perplexity', 'in_batch_dim': [8, 432], 'batch_size': 8, 'flops': 8542819977600}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5378812603520}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5378812603520}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5062411866112}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4113209653888}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4113209653888}, {'type': 'perplexity', 'in_batch_dim': [8, 368], 'batch_size': 8, 'flops': 7277217027968}, {'type': 'perplexity', 'in_batch_dim': [8, 160], 'batch_size': 8, 'flops': 3164007441664}, {'type': 'perplexity', 'in_batch_dim': [8, 368], 'batch_size': 8, 'flops': 7277217027968}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3796808916480}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4746011128704}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4113209653888}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4429610391296}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5695213340928}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5695213340928}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5062411866112}, {'type': 'perplexity', 'in_batch_dim': [8, 432], 'batch_size': 8, 'flops': 8542819977600}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5062411866112}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4113209653888}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3480408179072}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3480408179072}, {'type': 'perplexity', 'in_batch_dim': [8, 160], 'batch_size': 8, 'flops': 3164007441664}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4113209653888}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3796808916480}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5062411866112}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 368], 'batch_size': 8, 'flops': 7277217027968}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5378812603520}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5378812603520}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5062411866112}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2847606704256}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4429610391296}, {'type': 'perplexity', 'in_batch_dim': [8, 320], 'batch_size': 8, 'flops': 6328014815744}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3796808916480}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4429610391296}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4113209653888}, {'type': 'perplexity', 'in_batch_dim': [8, 384], 'batch_size': 8, 'flops': 7593617765376}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5378812603520}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4746011128704}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3480408179072}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5062411866112}, {'type': 'perplexity', 'in_batch_dim': [8, 320], 'batch_size': 8, 'flops': 6328014815744}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5378812603520}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4113209653888}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4113209653888}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5695213340928}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3480408179072}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6011614078336}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4746011128704}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5378812603520}, {'type': 'perplexity', 'in_batch_dim': [8, 320], 'batch_size': 8, 'flops': 6328014815744}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4429610391296}, {'type': 'perplexity', 'in_batch_dim': [8, 320], 'batch_size': 8, 'flops': 6328014815744}, {'type': 'perplexity', 'in_batch_dim': [8, 416], 'batch_size': 8, 'flops': 8226419240192}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5062411866112}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5062411866112}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2847606704256}, {'type': 'perplexity', 'in_batch_dim': [8, 336], 'batch_size': 8, 'flops': 6644415553152}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6011614078336}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5062411866112}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4113209653888}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5695213340928}, {'type': 'perplexity', 'in_batch_dim': [8, 416], 'batch_size': 8, 'flops': 8226419240192}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3796808916480}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5695213340928}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4746011128704}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4429610391296}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4113209653888}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3480408179072}, {'type': 'perplexity', 'in_batch_dim': [8, 320], 'batch_size': 8, 'flops': 6328014815744}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3480408179072}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5378812603520}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6011614078336}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4429610391296}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5695213340928}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5695213340928}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3480408179072}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4429610391296}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5062411866112}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3480408179072}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4113209653888}, {'type': 'perplexity', 'in_batch_dim': [8, 352], 'batch_size': 8, 'flops': 6960816290560}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4113209653888}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2847606704256}, {'type': 'perplexity', 'in_batch_dim': [8, 160], 'batch_size': 8, 'flops': 3164007441664}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3796808916480}, {'type': 'perplexity', 'in_batch_dim': [8, 496], 'batch_size': 8, 'flops': 9808422927232}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3480408179072}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4113209653888}, {'type': 'perplexity', 'in_batch_dim': [8, 544], 'batch_size': 8, 'flops': 10757625139456}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6011614078336}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5378812603520}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5695213340928}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4746011128704}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5695213340928}, {'type': 'perplexity', 'in_batch_dim': [8, 384], 'batch_size': 8, 'flops': 7593617765376}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3480408179072}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5378812603520}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3480408179072}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5378812603520}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5062411866112}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3796808916480}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5062411866112}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5695213340928}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5378812603520}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4429610391296}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5062411866112}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3480408179072}, {'type': 'perplexity', 'in_batch_dim': [8, 656], 'batch_size': 8, 'flops': 12972430301312}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5062411866112}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3796808916480}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3480408179072}, {'type': 'perplexity', 'in_batch_dim': [8, 160], 'batch_size': 8, 'flops': 3164007441664}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5695213340928}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6011614078336}, {'type': 'perplexity', 'in_batch_dim': [8, 320], 'batch_size': 8, 'flops': 6328014815744}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5062411866112}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5062411866112}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5695213340928}, {'type': 'perplexity', 'in_batch_dim': [8, 320], 'batch_size': 8, 'flops': 6328014815744}, {'type': 'perplexity', 'in_batch_dim': [8, 384], 'batch_size': 8, 'flops': 7593617765376}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5062411866112}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4746011128704}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4113209653888}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4746011128704}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4746011128704}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3480408179072}, {'type': 'perplexity', 'in_batch_dim': [8, 320], 'batch_size': 8, 'flops': 6328014815744}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5695213340928}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5062411866112}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4113209653888}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4746011128704}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2847606704256}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3796808916480}, {'type': 'perplexity', 'in_batch_dim': [8, 336], 'batch_size': 8, 'flops': 6644415553152}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4113209653888}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4746011128704}, {'type': 'perplexity', 'in_batch_dim': [8, 496], 'batch_size': 8, 'flops': 9808422927232}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5378812603520}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5378812603520}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4429610391296}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5062411866112}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6011614078336}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4746011128704}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3796808916480}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4113209653888}, {'type': 'perplexity', 'in_batch_dim': [8, 496], 'batch_size': 8, 'flops': 9808422927232}, {'type': 'perplexity', 'in_batch_dim': [8, 448], 'batch_size': 8, 'flops': 8859220715008}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6011614078336}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3796808916480}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3480408179072}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5062411866112}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4429610391296}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4113209653888}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4746011128704}, {'type': 'perplexity', 'in_batch_dim': [8, 320], 'batch_size': 8, 'flops': 6328014815744}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6011614078336}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3480408179072}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3796808916480}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3796808916480}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4746011128704}, {'type': 'perplexity', 'in_batch_dim': [8, 416], 'batch_size': 8, 'flops': 8226419240192}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5695213340928}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6011614078336}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6011614078336}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4746011128704}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3480408179072}, {'type': 'perplexity', 'in_batch_dim': [8, 160], 'batch_size': 8, 'flops': 3164007441664}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5695213340928}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3480408179072}, {'type': 'perplexity', 'in_batch_dim': [8, 400], 'batch_size': 8, 'flops': 7910018502784}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4429610391296}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5378812603520}, {'type': 'perplexity', 'in_batch_dim': [8, 400], 'batch_size': 8, 'flops': 7910018502784}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5378812603520}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3480408179072}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6011614078336}, {'type': 'perplexity', 'in_batch_dim': [8, 160], 'batch_size': 8, 'flops': 3164007441664}, {'type': 'perplexity', 'in_batch_dim': [8, 352], 'batch_size': 8, 'flops': 6960816290560}, {'type': 'perplexity', 'in_batch_dim': [8, 320], 'batch_size': 8, 'flops': 6328014815744}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5062411866112}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5062411866112}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4113209653888}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4429610391296}, {'type': 'perplexity', 'in_batch_dim': [8, 352], 'batch_size': 8, 'flops': 6960816290560}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4113209653888}, {'type': 'perplexity', 'in_batch_dim': [8, 160], 'batch_size': 8, 'flops': 3164007441664}, {'type': 'perplexity', 'in_batch_dim': [8, 336], 'batch_size': 8, 'flops': 6644415553152}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4429610391296}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3796808916480}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5378812603520}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5378812603520}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4113209653888}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5378812603520}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3796808916480}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5378812603520}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5378812603520}, {'type': 'perplexity', 'in_batch_dim': [8, 160], 'batch_size': 8, 'flops': 3164007441664}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3480408179072}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6011614078336}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5695213340928}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5378812603520}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5378812603520}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4429610391296}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4746011128704}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6011614078336}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5062411866112}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6011614078336}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5378812603520}, {'type': 'perplexity', 'in_batch_dim': [8, 160], 'batch_size': 8, 'flops': 3164007441664}, {'type': 'perplexity', 'in_batch_dim': [8, 160], 'batch_size': 8, 'flops': 3164007441664}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5062411866112}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5062411866112}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3796808916480}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6011614078336}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4113209653888}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4746011128704}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5378812603520}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3480408179072}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3796808916480}, {'type': 'perplexity', 'in_batch_dim': [8, 320], 'batch_size': 8, 'flops': 6328014815744}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5062411866112}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4746011128704}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5062411866112}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4746011128704}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6011614078336}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3796808916480}, {'type': 'perplexity', 'in_batch_dim': [8, 160], 'batch_size': 8, 'flops': 3164007441664}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2847606704256}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4429610391296}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4746011128704}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5062411866112}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5378812603520}, {'type': 'perplexity', 'in_batch_dim': [8, 464], 'batch_size': 8, 'flops': 9175621452416}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4113209653888}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3480408179072}, {'type': 'perplexity', 'in_batch_dim': [8, 384], 'batch_size': 8, 'flops': 7593617765376}, {'type': 'perplexity', 'in_batch_dim': [8, 448], 'batch_size': 8, 'flops': 8859220715008}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5378812603520}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4113209653888}, {'type': 'perplexity', 'in_batch_dim': [8, 320], 'batch_size': 8, 'flops': 6328014815744}, {'type': 'perplexity', 'in_batch_dim': [8, 560], 'batch_size': 8, 'flops': 11074025876864}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5378812603520}, {'type': 'perplexity', 'in_batch_dim': [8, 160], 'batch_size': 8, 'flops': 3164007441664}, {'type': 'perplexity', 'in_batch_dim': [8, 320], 'batch_size': 8, 'flops': 6328014815744}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5062411866112}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6011614078336}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4746011128704}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6011614078336}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3796808916480}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4429610391296}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4113209653888}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4746011128704}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3480408179072}, {'type': 'perplexity', 'in_batch_dim': [8, 384], 'batch_size': 8, 'flops': 7593617765376}, {'type': 'perplexity', 'in_batch_dim': [8, 576], 'batch_size': 8, 'flops': 11390426614272}, {'type': 'perplexity', 'in_batch_dim': [8, 160], 'batch_size': 8, 'flops': 3164007441664}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4746011128704}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4746011128704}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4429610391296}, {'type': 'perplexity', 'in_batch_dim': [8, 336], 'batch_size': 8, 'flops': 6644415553152}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4746011128704}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3796808916480}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3796808916480}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5062411866112}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5378812603520}, {'type': 'perplexity', 'in_batch_dim': [8, 368], 'batch_size': 8, 'flops': 7277217027968}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3480408179072}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4113209653888}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4746011128704}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3480408179072}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4746011128704}, {'type': 'perplexity', 'in_batch_dim': [8, 1168], 'batch_size': 8, 'flops': 23097253898368}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5062411866112}, {'type': 'perplexity', 'in_batch_dim': [8, 160], 'batch_size': 8, 'flops': 3164007441664}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4746011128704}, {'type': 'perplexity', 'in_batch_dim': [8, 640], 'batch_size': 8, 'flops': 12656029563904}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5378812603520}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4429610391296}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4429610391296}, {'type': 'perplexity', 'in_batch_dim': [8, 384], 'batch_size': 8, 'flops': 7593617765376}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3480408179072}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4113209653888}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3796808916480}, {'type': 'perplexity', 'in_batch_dim': [8, 336], 'batch_size': 8, 'flops': 6644415553152}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3480408179072}, {'type': 'perplexity', 'in_batch_dim': [6, 208], 'batch_size': 8, 'flops': 4113209653888}], 'timestamp': '2025-09-10 02:17:17.297865', 'step': 588, 'epoch': 1} {'type': 'pplx', 'content': 17933771.412629146, 'timestamp': '2025-09-10 02:17:17.300864', 'step': 588, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 816], 'flops': 24205052762688}, 'timestamp': '2025-09-10 02:17:17.369599', 'step': 588, 'epoch': 1} {'type': 'loss', 'content': 0.009856624528765678, 'timestamp': '2025-09-10 02:17:17.397874', 'step': 589, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 7593922352128}, 'timestamp': '2025-09-10 02:17:17.433590', 'step': 589, 'epoch': 1} {'type': 'loss', 'content': 0.017127353698015213, 'timestamp': '2025-09-10 02:17:17.440647', 'step': 590, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 8068526078144}, 'timestamp': '2025-09-10 02:17:17.472940', 'step': 590, 'epoch': 1} {'type': 'loss', 'content': 0.011390717700123787, 'timestamp': '2025-09-10 02:17:17.482793', 'step': 591, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 304], 'flops': 9017733530176}, 'timestamp': '2025-09-10 02:17:17.514056', 'step': 591, 'epoch': 1} {'type': 'loss', 'content': 0.014447472058236599, 'timestamp': '2025-09-10 02:17:17.546534', 'step': 592, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 5695507448064}, 'timestamp': '2025-09-10 02:17:17.579394', 'step': 592, 'epoch': 1} {'type': 'loss', 'content': 0.003005419624969363, 'timestamp': '2025-09-10 02:17:17.583121', 'step': 593, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 7593922352128}, 'timestamp': '2025-09-10 02:17:17.615124', 'step': 593, 'epoch': 1} {'type': 'loss', 'content': 0.00944295059889555, 'timestamp': '2025-09-10 02:17:17.622720', 'step': 594, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 5695507448064}, 'timestamp': '2025-09-10 02:17:17.660679', 'step': 594, 'epoch': 1} {'type': 'loss', 'content': 0.009788398630917072, 'timestamp': '2025-09-10 02:17:17.665069', 'step': 595, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 384], 'flops': 11390752160256}, 'timestamp': '2025-09-10 02:17:17.701220', 'step': 595, 'epoch': 1} {'type': 'loss', 'content': 0.012333549559116364, 'timestamp': '2025-09-10 02:17:17.736145', 'step': 596, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 6644714900096}, 'timestamp': '2025-09-10 02:17:17.769079', 'step': 596, 'epoch': 1} {'type': 'loss', 'content': 0.01467831339687109, 'timestamp': '2025-09-10 02:17:17.773289', 'step': 597, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 7119318626112}, 'timestamp': '2025-09-10 02:17:17.807430', 'step': 597, 'epoch': 1} {'type': 'loss', 'content': 0.03252703696489334, 'timestamp': '2025-09-10 02:17:17.814593', 'step': 598, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 7593922352128}, 'timestamp': '2025-09-10 02:17:17.849955', 'step': 598, 'epoch': 1} {'type': 'loss', 'content': 0.012095707468688488, 'timestamp': '2025-09-10 02:17:17.857669', 'step': 599, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 7119318626112}, 'timestamp': '2025-09-10 02:17:17.890183', 'step': 599, 'epoch': 1} {'type': 'loss', 'content': 0.03608888015151024, 'timestamp': '2025-09-10 02:17:17.918339', 'step': 600, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 6644714900096}, 'timestamp': '2025-09-10 02:17:17.951914', 'step': 600, 'epoch': 1} {'type': 'loss', 'content': 0.0019412686815485358, 'timestamp': '2025-09-10 02:17:17.956404', 'step': 601, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 6644714900096}, 'timestamp': '2025-09-10 02:17:17.986749', 'step': 601, 'epoch': 1} {'type': 'loss', 'content': 0.015688760206103325, 'timestamp': '2025-09-10 02:17:17.993894', 'step': 602, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 304], 'flops': 9017733530176}, 'timestamp': '2025-09-10 02:17:18.026557', 'step': 602, 'epoch': 1} {'type': 'loss', 'content': 0.013548861257731915, 'timestamp': '2025-09-10 02:17:18.038440', 'step': 603, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 320], 'flops': 9492337256192}, 'timestamp': '2025-09-10 02:17:18.070533', 'step': 603, 'epoch': 1} {'type': 'loss', 'content': 0.045547544956207275, 'timestamp': '2025-09-10 02:17:18.103997', 'step': 604, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 368], 'flops': 10916148434240}, 'timestamp': '2025-09-10 02:17:18.147664', 'step': 604, 'epoch': 1} {'type': 'loss', 'content': 0.029206562787294388, 'timestamp': '2025-09-10 02:17:18.160822', 'step': 605, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 6170111174080}, 'timestamp': '2025-09-10 02:17:18.200358', 'step': 605, 'epoch': 1} {'type': 'loss', 'content': 0.0025844343472272158, 'timestamp': '2025-09-10 02:17:18.207248', 'step': 606, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 8543129804160}, 'timestamp': '2025-09-10 02:17:18.239358', 'step': 606, 'epoch': 1} {'type': 'loss', 'content': 0.008630426600575447, 'timestamp': '2025-09-10 02:17:18.249893', 'step': 607, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 5695507448064}, 'timestamp': '2025-09-10 02:17:18.283006', 'step': 607, 'epoch': 1} {'type': 'loss', 'content': 0.0020521271508187056, 'timestamp': '2025-09-10 02:17:18.308137', 'step': 608, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 6644714900096}, 'timestamp': '2025-09-10 02:17:18.341145', 'step': 608, 'epoch': 1} {'type': 'loss', 'content': 0.0035357114393264055, 'timestamp': '2025-09-10 02:17:18.345327', 'step': 609, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 7119318626112}, 'timestamp': '2025-09-10 02:17:18.377034', 'step': 609, 'epoch': 1} {'type': 'loss', 'content': 0.006719955708831549, 'timestamp': '2025-09-10 02:17:18.384473', 'step': 610, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 5695507448064}, 'timestamp': '2025-09-10 02:17:18.417171', 'step': 610, 'epoch': 1} {'type': 'loss', 'content': 0.004843573085963726, 'timestamp': '2025-09-10 02:17:18.421468', 'step': 611, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 8068526078144}, 'timestamp': '2025-09-10 02:17:18.454741', 'step': 611, 'epoch': 1} {'type': 'loss', 'content': 0.023189399391412735, 'timestamp': '2025-09-10 02:17:18.485520', 'step': 612, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 336], 'flops': 9966940982208}, 'timestamp': '2025-09-10 02:17:18.521681', 'step': 612, 'epoch': 1} {'type': 'loss', 'content': 0.005349505692720413, 'timestamp': '2025-09-10 02:17:18.534378', 'step': 613, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 304], 'flops': 9017733530176}, 'timestamp': '2025-09-10 02:17:18.566683', 'step': 613, 'epoch': 1} {'type': 'loss', 'content': 0.0026773291174322367, 'timestamp': '2025-09-10 02:17:18.578531', 'step': 614, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 176], 'flops': 5220903722048}, 'timestamp': '2025-09-10 02:17:18.610775', 'step': 614, 'epoch': 1} {'type': 'loss', 'content': 0.02543543465435505, 'timestamp': '2025-09-10 02:17:18.614897', 'step': 615, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 352], 'flops': 10441544708224}, 'timestamp': '2025-09-10 02:17:18.650542', 'step': 615, 'epoch': 1} {'type': 'loss', 'content': 0.006155446171760559, 'timestamp': '2025-09-10 02:17:18.685120', 'step': 616, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 7119318626112}, 'timestamp': '2025-09-10 02:17:18.718426', 'step': 616, 'epoch': 1} {'type': 'loss', 'content': 0.012057982385158539, 'timestamp': '2025-09-10 02:17:18.723346', 'step': 617, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 7119318626112}, 'timestamp': '2025-09-10 02:17:18.757126', 'step': 617, 'epoch': 1} {'type': 'loss', 'content': 0.0031552365981042385, 'timestamp': '2025-09-10 02:17:18.764412', 'step': 618, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 7593922352128}, 'timestamp': '2025-09-10 02:17:18.796501', 'step': 618, 'epoch': 1} {'type': 'loss', 'content': 0.024341052398085594, 'timestamp': '2025-09-10 02:17:18.804106', 'step': 619, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 8068526078144}, 'timestamp': '2025-09-10 02:17:18.839158', 'step': 619, 'epoch': 1} {'type': 'loss', 'content': 0.0073067969642579556, 'timestamp': '2025-09-10 02:17:18.869850', 'step': 620, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 8543129804160}, 'timestamp': '2025-09-10 02:17:18.902496', 'step': 620, 'epoch': 1} {'type': 'loss', 'content': 0.018821122124791145, 'timestamp': '2025-09-10 02:17:18.910508', 'step': 621, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 5695507448064}, 'timestamp': '2025-09-10 02:17:18.943708', 'step': 621, 'epoch': 1} {'type': 'loss', 'content': 0.0015981352189555764, 'timestamp': '2025-09-10 02:17:18.947909', 'step': 622, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 7593922352128}, 'timestamp': '2025-09-10 02:17:18.981778', 'step': 622, 'epoch': 1} {'type': 'loss', 'content': 0.010449434630572796, 'timestamp': '2025-09-10 02:17:18.989279', 'step': 623, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:17:19.021145', 'step': 623, 'epoch': 1} {'type': 'loss', 'content': 0.006673470605164766, 'timestamp': '2025-09-10 02:17:19.046217', 'step': 624, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 6170111174080}, 'timestamp': '2025-09-10 02:17:19.077682', 'step': 624, 'epoch': 1} {'type': 'loss', 'content': 0.027125883847475052, 'timestamp': '2025-09-10 02:17:19.081932', 'step': 625, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 176], 'flops': 5220903722048}, 'timestamp': '2025-09-10 02:17:19.111910', 'step': 625, 'epoch': 1} {'type': 'loss', 'content': 0.00828898511826992, 'timestamp': '2025-09-10 02:17:19.115841', 'step': 626, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 7593922352128}, 'timestamp': '2025-09-10 02:17:19.148853', 'step': 626, 'epoch': 1} {'type': 'loss', 'content': 0.0038712245877832174, 'timestamp': '2025-09-10 02:17:19.156549', 'step': 627, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 8068526078144}, 'timestamp': '2025-09-10 02:17:19.188531', 'step': 627, 'epoch': 1} {'type': 'loss', 'content': 0.012664406560361385, 'timestamp': '2025-09-10 02:17:19.219467', 'step': 628, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 6644714900096}, 'timestamp': '2025-09-10 02:17:19.251808', 'step': 628, 'epoch': 1} {'type': 'loss', 'content': 0.03356235474348068, 'timestamp': '2025-09-10 02:17:19.256337', 'step': 629, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 304], 'flops': 9017733530176}, 'timestamp': '2025-09-10 02:17:19.288914', 'step': 629, 'epoch': 1} {'type': 'loss', 'content': 0.027829742059111595, 'timestamp': '2025-09-10 02:17:19.300893', 'step': 630, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 6644714900096}, 'timestamp': '2025-09-10 02:17:19.334244', 'step': 630, 'epoch': 1} {'type': 'loss', 'content': 0.009094549342989922, 'timestamp': '2025-09-10 02:17:19.341336', 'step': 631, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 7593922352128}, 'timestamp': '2025-09-10 02:17:19.373190', 'step': 631, 'epoch': 1} {'type': 'loss', 'content': 0.0018705466063693166, 'timestamp': '2025-09-10 02:17:19.401577', 'step': 632, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 7119318626112}, 'timestamp': '2025-09-10 02:17:19.433749', 'step': 632, 'epoch': 1} {'type': 'loss', 'content': 0.017098234966397285, 'timestamp': '2025-09-10 02:17:19.438572', 'step': 633, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 7119318626112}, 'timestamp': '2025-09-10 02:17:19.472952', 'step': 633, 'epoch': 1} {'type': 'loss', 'content': 0.052354682236909866, 'timestamp': '2025-09-10 02:17:19.480464', 'step': 634, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 6170111174080}, 'timestamp': '2025-09-10 02:17:19.513654', 'step': 634, 'epoch': 1} {'type': 'loss', 'content': 0.055087942630052567, 'timestamp': '2025-09-10 02:17:19.520295', 'step': 635, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 304], 'flops': 9017733530176}, 'timestamp': '2025-09-10 02:17:19.553748', 'step': 635, 'epoch': 1} {'type': 'loss', 'content': 0.005113348830491304, 'timestamp': '2025-09-10 02:17:19.586415', 'step': 636, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 7119318626112}, 'timestamp': '2025-09-10 02:17:19.618032', 'step': 636, 'epoch': 1} {'type': 'loss', 'content': 0.05106746777892113, 'timestamp': '2025-09-10 02:17:19.622994', 'step': 637, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 4746299996032}, 'timestamp': '2025-09-10 02:17:19.660180', 'step': 637, 'epoch': 1} {'type': 'loss', 'content': 0.0263382438570261, 'timestamp': '2025-09-10 02:17:19.662427', 'step': 638, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 6644714900096}, 'timestamp': '2025-09-10 02:17:19.693860', 'step': 638, 'epoch': 1} {'type': 'loss', 'content': 0.007414556574076414, 'timestamp': '2025-09-10 02:17:19.700726', 'step': 639, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 320], 'flops': 9492337256192}, 'timestamp': '2025-09-10 02:17:19.733055', 'step': 639, 'epoch': 1} {'type': 'loss', 'content': 0.001413815887644887, 'timestamp': '2025-09-10 02:17:19.766419', 'step': 640, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 320], 'flops': 9492337256192}, 'timestamp': '2025-09-10 02:17:19.798671', 'step': 640, 'epoch': 1} {'type': 'loss', 'content': 0.021783774718642235, 'timestamp': '2025-09-10 02:17:19.808756', 'step': 641, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 6644714900096}, 'timestamp': '2025-09-10 02:17:19.840085', 'step': 641, 'epoch': 1} {'type': 'loss', 'content': 0.027584636583924294, 'timestamp': '2025-09-10 02:17:19.846841', 'step': 642, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 7119318626112}, 'timestamp': '2025-09-10 02:17:19.879816', 'step': 642, 'epoch': 1} {'type': 'loss', 'content': 0.03216005116701126, 'timestamp': '2025-09-10 02:17:19.887161', 'step': 643, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 304], 'flops': 9017733530176}, 'timestamp': '2025-09-10 02:17:19.919658', 'step': 643, 'epoch': 1} {'type': 'loss', 'content': 0.036754488945007324, 'timestamp': '2025-09-10 02:17:19.951670', 'step': 644, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 5695507448064}, 'timestamp': '2025-09-10 02:17:19.984226', 'step': 644, 'epoch': 1} {'type': 'loss', 'content': 0.009167312644422054, 'timestamp': '2025-09-10 02:17:19.986908', 'step': 645, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 5695507448064}, 'timestamp': '2025-09-10 02:17:20.020646', 'step': 645, 'epoch': 1} {'type': 'loss', 'content': 0.015368753112852573, 'timestamp': '2025-09-10 02:17:20.027113', 'step': 646, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 176], 'flops': 5220903722048}, 'timestamp': '2025-09-10 02:17:20.067572', 'step': 646, 'epoch': 1} {'type': 'loss', 'content': 0.025697126984596252, 'timestamp': '2025-09-10 02:17:20.071649', 'step': 647, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 7593922352128}, 'timestamp': '2025-09-10 02:17:20.104098', 'step': 647, 'epoch': 1} {'type': 'loss', 'content': 0.0037650710437446833, 'timestamp': '2025-09-10 02:17:20.132598', 'step': 648, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 6644714900096}, 'timestamp': '2025-09-10 02:17:20.164187', 'step': 648, 'epoch': 1} {'type': 'loss', 'content': 0.010932421311736107, 'timestamp': '2025-09-10 02:17:20.168755', 'step': 649, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 400], 'flops': 11865355886272}, 'timestamp': '2025-09-10 02:17:20.206320', 'step': 649, 'epoch': 1} {'type': 'loss', 'content': 0.022069621831178665, 'timestamp': '2025-09-10 02:17:20.221952', 'step': 650, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 6644714900096}, 'timestamp': '2025-09-10 02:17:20.253942', 'step': 650, 'epoch': 1} {'type': 'loss', 'content': 0.0029460687655955553, 'timestamp': '2025-09-10 02:17:20.261051', 'step': 651, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 7593922352128}, 'timestamp': '2025-09-10 02:17:20.291356', 'step': 651, 'epoch': 1} {'type': 'loss', 'content': 0.01202553603798151, 'timestamp': '2025-09-10 02:17:20.319792', 'step': 652, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 368], 'flops': 10916148434240}, 'timestamp': '2025-09-10 02:17:20.357378', 'step': 652, 'epoch': 1} {'type': 'loss', 'content': 0.0178877804428339, 'timestamp': '2025-09-10 02:17:20.370475', 'step': 653, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 6170111174080}, 'timestamp': '2025-09-10 02:17:20.400728', 'step': 653, 'epoch': 1} {'type': 'loss', 'content': 0.02879807911813259, 'timestamp': '2025-09-10 02:17:20.407758', 'step': 654, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 8068526078144}, 'timestamp': '2025-09-10 02:17:20.439357', 'step': 654, 'epoch': 1} {'type': 'loss', 'content': 0.014536075294017792, 'timestamp': '2025-09-10 02:17:20.449146', 'step': 655, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 304], 'flops': 9017733530176}, 'timestamp': '2025-09-10 02:17:20.479745', 'step': 655, 'epoch': 1} {'type': 'loss', 'content': 0.005087182391434908, 'timestamp': '2025-09-10 02:17:20.512393', 'step': 656, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 320], 'flops': 9492337256192}, 'timestamp': '2025-09-10 02:17:20.542746', 'step': 656, 'epoch': 1} {'type': 'loss', 'content': 0.021268010139465332, 'timestamp': '2025-09-10 02:17:20.552947', 'step': 657, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 7119318626112}, 'timestamp': '2025-09-10 02:17:20.584319', 'step': 657, 'epoch': 1} {'type': 'loss', 'content': 0.03790181875228882, 'timestamp': '2025-09-10 02:17:20.591566', 'step': 658, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 7119318626112}, 'timestamp': '2025-09-10 02:17:20.623381', 'step': 658, 'epoch': 1} {'type': 'loss', 'content': 0.01340021938085556, 'timestamp': '2025-09-10 02:17:20.630847', 'step': 659, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 7593922352128}, 'timestamp': '2025-09-10 02:17:20.660721', 'step': 659, 'epoch': 1} {'type': 'loss', 'content': 0.04001796990633011, 'timestamp': '2025-09-10 02:17:20.689358', 'step': 660, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 6644714900096}, 'timestamp': '2025-09-10 02:17:20.720306', 'step': 660, 'epoch': 1} {'type': 'loss', 'content': 0.014666594564914703, 'timestamp': '2025-09-10 02:17:20.724796', 'step': 661, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 7119318626112}, 'timestamp': '2025-09-10 02:17:20.755743', 'step': 661, 'epoch': 1} {'type': 'loss', 'content': 0.015082466416060925, 'timestamp': '2025-09-10 02:17:20.763048', 'step': 662, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 7593922352128}, 'timestamp': '2025-09-10 02:17:20.793259', 'step': 662, 'epoch': 1} {'type': 'loss', 'content': 0.02892708219587803, 'timestamp': '2025-09-10 02:17:20.800964', 'step': 663, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 304], 'flops': 9017733530176}, 'timestamp': '2025-09-10 02:17:20.831799', 'step': 663, 'epoch': 1} {'type': 'loss', 'content': 0.015628747642040253, 'timestamp': '2025-09-10 02:17:20.864736', 'step': 664, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 6170111174080}, 'timestamp': '2025-09-10 02:17:20.896146', 'step': 664, 'epoch': 1} {'type': 'loss', 'content': 0.00712405052036047, 'timestamp': '2025-09-10 02:17:20.900645', 'step': 665, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 6644714900096}, 'timestamp': '2025-09-10 02:17:20.930963', 'step': 665, 'epoch': 1} {'type': 'loss', 'content': 0.026593917980790138, 'timestamp': '2025-09-10 02:17:20.937907', 'step': 666, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 8543129804160}, 'timestamp': '2025-09-10 02:17:20.968190', 'step': 666, 'epoch': 1} {'type': 'loss', 'content': 0.024430980905890465, 'timestamp': '2025-09-10 02:17:20.979065', 'step': 667, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 176], 'flops': 5220903722048}, 'timestamp': '2025-09-10 02:17:21.009583', 'step': 667, 'epoch': 1} {'type': 'loss', 'content': 0.00685026403516531, 'timestamp': '2025-09-10 02:17:21.034329', 'step': 668, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 6170111174080}, 'timestamp': '2025-09-10 02:17:21.064108', 'step': 668, 'epoch': 1} {'type': 'loss', 'content': 0.017591923475265503, 'timestamp': '2025-09-10 02:17:21.068756', 'step': 669, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 7593922352128}, 'timestamp': '2025-09-10 02:17:21.100801', 'step': 669, 'epoch': 1} {'type': 'loss', 'content': 0.037963759154081345, 'timestamp': '2025-09-10 02:17:21.108507', 'step': 670, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 7119318626112}, 'timestamp': '2025-09-10 02:17:21.139238', 'step': 670, 'epoch': 1} {'type': 'loss', 'content': 0.010039789602160454, 'timestamp': '2025-09-10 02:17:21.146744', 'step': 671, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 8068526078144}, 'timestamp': '2025-09-10 02:17:21.177482', 'step': 671, 'epoch': 1} {'type': 'loss', 'content': 0.009369760751724243, 'timestamp': '2025-09-10 02:17:21.208546', 'step': 672, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 6644714900096}, 'timestamp': '2025-09-10 02:17:21.239684', 'step': 672, 'epoch': 1} {'type': 'loss', 'content': 0.021921101957559586, 'timestamp': '2025-09-10 02:17:21.244528', 'step': 673, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 432], 'flops': 12814563338304}, 'timestamp': '2025-09-10 02:17:21.283405', 'step': 673, 'epoch': 1} {'type': 'loss', 'content': 0.031845226883888245, 'timestamp': '2025-09-10 02:17:21.299593', 'step': 674, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 6170111174080}, 'timestamp': '2025-09-10 02:17:21.330395', 'step': 674, 'epoch': 1} {'type': 'loss', 'content': 0.018058914691209793, 'timestamp': '2025-09-10 02:17:21.337228', 'step': 675, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 4746299996032}, 'timestamp': '2025-09-10 02:17:21.367685', 'step': 675, 'epoch': 1} {'type': 'loss', 'content': 0.014186178334057331, 'timestamp': '2025-09-10 02:17:21.392579', 'step': 676, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 6170111174080}, 'timestamp': '2025-09-10 02:17:21.422596', 'step': 676, 'epoch': 1} {'type': 'loss', 'content': 0.009794117882847786, 'timestamp': '2025-09-10 02:17:21.427139', 'step': 677, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 6644714900096}, 'timestamp': '2025-09-10 02:17:21.461444', 'step': 677, 'epoch': 1} {'type': 'loss', 'content': 0.02925429679453373, 'timestamp': '2025-09-10 02:17:21.468343', 'step': 678, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 7119318626112}, 'timestamp': '2025-09-10 02:17:21.498941', 'step': 678, 'epoch': 1} {'type': 'loss', 'content': 0.006639067083597183, 'timestamp': '2025-09-10 02:17:21.506197', 'step': 679, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 352], 'flops': 10441544708224}, 'timestamp': '2025-09-10 02:17:21.539972', 'step': 679, 'epoch': 1} {'type': 'loss', 'content': 0.012162303552031517, 'timestamp': '2025-09-10 02:17:21.574487', 'step': 680, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 176], 'flops': 5220903722048}, 'timestamp': '2025-09-10 02:17:21.606403', 'step': 680, 'epoch': 1} {'type': 'loss', 'content': 0.01577383652329445, 'timestamp': '2025-09-10 02:17:21.608542', 'step': 681, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 6644714900096}, 'timestamp': '2025-09-10 02:17:21.638652', 'step': 681, 'epoch': 1} {'type': 'loss', 'content': 0.01001597661525011, 'timestamp': '2025-09-10 02:17:21.645573', 'step': 682, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 7593922352128}, 'timestamp': '2025-09-10 02:17:21.676435', 'step': 682, 'epoch': 1} {'type': 'loss', 'content': 0.027138683944940567, 'timestamp': '2025-09-10 02:17:21.684229', 'step': 683, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 6170111174080}, 'timestamp': '2025-09-10 02:17:21.714391', 'step': 683, 'epoch': 1} {'type': 'loss', 'content': 0.01829609088599682, 'timestamp': '2025-09-10 02:17:21.742189', 'step': 684, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 400], 'flops': 11865355886272}, 'timestamp': '2025-09-10 02:17:21.778582', 'step': 684, 'epoch': 1} {'type': 'loss', 'content': 0.020318562164902687, 'timestamp': '2025-09-10 02:17:21.793784', 'step': 685, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 7593922352128}, 'timestamp': '2025-09-10 02:17:21.824562', 'step': 685, 'epoch': 1} {'type': 'loss', 'content': 0.025962335988879204, 'timestamp': '2025-09-10 02:17:21.832278', 'step': 686, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 6644714900096}, 'timestamp': '2025-09-10 02:17:21.864754', 'step': 686, 'epoch': 1} {'type': 'loss', 'content': 0.022130966186523438, 'timestamp': '2025-09-10 02:17:21.871795', 'step': 687, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 6644714900096}, 'timestamp': '2025-09-10 02:17:21.908649', 'step': 687, 'epoch': 1} {'type': 'loss', 'content': 0.020423393696546555, 'timestamp': '2025-09-10 02:17:21.936467', 'step': 688, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 6644714900096}, 'timestamp': '2025-09-10 02:17:21.969398', 'step': 688, 'epoch': 1} {'type': 'loss', 'content': 0.025996601209044456, 'timestamp': '2025-09-10 02:17:21.974116', 'step': 689, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 304], 'flops': 9017733530176}, 'timestamp': '2025-09-10 02:17:22.008827', 'step': 689, 'epoch': 1} {'type': 'loss', 'content': 0.013769307173788548, 'timestamp': '2025-09-10 02:17:22.021168', 'step': 690, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 6170111174080}, 'timestamp': '2025-09-10 02:17:22.051835', 'step': 690, 'epoch': 1} {'type': 'loss', 'content': 0.01184395607560873, 'timestamp': '2025-09-10 02:17:22.058527', 'step': 691, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 6170111174080}, 'timestamp': '2025-09-10 02:17:22.089604', 'step': 691, 'epoch': 1} {'type': 'loss', 'content': 0.013138137757778168, 'timestamp': '2025-09-10 02:17:22.117290', 'step': 692, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 6644714900096}, 'timestamp': '2025-09-10 02:17:22.148020', 'step': 692, 'epoch': 1} {'type': 'loss', 'content': 0.02101938985288143, 'timestamp': '2025-09-10 02:17:22.152625', 'step': 693, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 7593922352128}, 'timestamp': '2025-09-10 02:17:22.182555', 'step': 693, 'epoch': 1} {'type': 'loss', 'content': 0.008636675775051117, 'timestamp': '2025-09-10 02:17:22.190279', 'step': 694, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 7119318626112}, 'timestamp': '2025-09-10 02:17:22.220932', 'step': 694, 'epoch': 1} {'type': 'loss', 'content': 0.014400548301637173, 'timestamp': '2025-09-10 02:17:22.228369', 'step': 695, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 432], 'flops': 12814563338304}, 'timestamp': '2025-09-10 02:17:22.266988', 'step': 695, 'epoch': 1} {'type': 'loss', 'content': 0.02432694099843502, 'timestamp': '2025-09-10 02:17:22.304035', 'step': 696, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 8543129804160}, 'timestamp': '2025-09-10 02:17:22.334547', 'step': 696, 'epoch': 1} {'type': 'loss', 'content': 0.00980927050113678, 'timestamp': '2025-09-10 02:17:22.342975', 'step': 697, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 7119318626112}, 'timestamp': '2025-09-10 02:17:22.373782', 'step': 697, 'epoch': 1} {'type': 'loss', 'content': 0.011573218740522861, 'timestamp': '2025-09-10 02:17:22.381161', 'step': 698, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 7119318626112}, 'timestamp': '2025-09-10 02:17:22.411688', 'step': 698, 'epoch': 1} {'type': 'loss', 'content': 0.02407762221992016, 'timestamp': '2025-09-10 02:17:22.419006', 'step': 699, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 6170111174080}, 'timestamp': '2025-09-10 02:17:22.448918', 'step': 699, 'epoch': 1} {'type': 'loss', 'content': 0.02633582428097725, 'timestamp': '2025-09-10 02:17:22.476479', 'step': 700, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 320], 'flops': 9492337256192}, 'timestamp': '2025-09-10 02:17:22.506687', 'step': 700, 'epoch': 1} {'type': 'loss', 'content': 0.011304855346679688, 'timestamp': '2025-09-10 02:17:22.517108', 'step': 701, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 6170111174080}, 'timestamp': '2025-09-10 02:17:22.547634', 'step': 701, 'epoch': 1} {'type': 'loss', 'content': 0.019669000059366226, 'timestamp': '2025-09-10 02:17:22.554496', 'step': 702, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 8543129804160}, 'timestamp': '2025-09-10 02:17:22.585771', 'step': 702, 'epoch': 1} {'type': 'loss', 'content': 0.024419734254479408, 'timestamp': '2025-09-10 02:17:22.596278', 'step': 703, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 7119318626112}, 'timestamp': '2025-09-10 02:17:22.627396', 'step': 703, 'epoch': 1} {'type': 'loss', 'content': 0.023848844692111015, 'timestamp': '2025-09-10 02:17:22.655573', 'step': 704, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 320], 'flops': 9492337256192}, 'timestamp': '2025-09-10 02:17:22.686399', 'step': 704, 'epoch': 1} {'type': 'loss', 'content': 0.010360152460634708, 'timestamp': '2025-09-10 02:17:22.696638', 'step': 705, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 5695507448064}, 'timestamp': '2025-09-10 02:17:22.727958', 'step': 705, 'epoch': 1} {'type': 'loss', 'content': 0.011346792802214622, 'timestamp': '2025-09-10 02:17:22.731910', 'step': 706, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 304], 'flops': 9017733530176}, 'timestamp': '2025-09-10 02:17:22.762430', 'step': 706, 'epoch': 1} {'type': 'loss', 'content': 0.013497546315193176, 'timestamp': '2025-09-10 02:17:22.774546', 'step': 707, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 336], 'flops': 9966940982208}, 'timestamp': '2025-09-10 02:17:22.807696', 'step': 707, 'epoch': 1} {'type': 'loss', 'content': 0.015288002789020538, 'timestamp': '2025-09-10 02:17:22.841930', 'step': 708, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 7119318626112}, 'timestamp': '2025-09-10 02:17:22.872858', 'step': 708, 'epoch': 1} {'type': 'loss', 'content': 0.013141672126948833, 'timestamp': '2025-09-10 02:17:22.877838', 'step': 709, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 176], 'flops': 5220903722048}, 'timestamp': '2025-09-10 02:17:22.907856', 'step': 709, 'epoch': 1} {'type': 'loss', 'content': 0.020733583718538284, 'timestamp': '2025-09-10 02:17:22.911893', 'step': 710, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 336], 'flops': 9966940982208}, 'timestamp': '2025-09-10 02:17:22.944525', 'step': 710, 'epoch': 1} {'type': 'loss', 'content': 0.01969611644744873, 'timestamp': '2025-09-10 02:17:22.957824', 'step': 711, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-10 02:17:22.989433', 'step': 711, 'epoch': 1} {'type': 'loss', 'content': 0.018604954704642296, 'timestamp': '2025-09-10 02:17:23.012821', 'step': 712, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 8068526078144}, 'timestamp': '2025-09-10 02:17:23.043016', 'step': 712, 'epoch': 1} {'type': 'loss', 'content': 0.018361497670412064, 'timestamp': '2025-09-10 02:17:23.050764', 'step': 713, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 6170111174080}, 'timestamp': '2025-09-10 02:17:23.081369', 'step': 713, 'epoch': 1} {'type': 'loss', 'content': 0.013966246508061886, 'timestamp': '2025-09-10 02:17:23.088194', 'step': 714, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 6644714900096}, 'timestamp': '2025-09-10 02:17:23.117132', 'step': 714, 'epoch': 1} {'type': 'loss', 'content': 0.010636130347847939, 'timestamp': '2025-09-10 02:17:23.123967', 'step': 715, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 8068526078144}, 'timestamp': '2025-09-10 02:17:23.154581', 'step': 715, 'epoch': 1} {'type': 'loss', 'content': 0.01876237615942955, 'timestamp': '2025-09-10 02:17:23.185152', 'step': 716, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 352], 'flops': 10441544708224}, 'timestamp': '2025-09-10 02:17:23.217490', 'step': 716, 'epoch': 1} {'type': 'loss', 'content': 0.008436868898570538, 'timestamp': '2025-09-10 02:17:23.230458', 'step': 717, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 8543129804160}, 'timestamp': '2025-09-10 02:17:23.260739', 'step': 717, 'epoch': 1} {'type': 'loss', 'content': 0.013500401750206947, 'timestamp': '2025-09-10 02:17:23.271538', 'step': 718, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 400], 'flops': 11865355886272}, 'timestamp': '2025-09-10 02:17:23.310070', 'step': 718, 'epoch': 1} {'type': 'loss', 'content': 0.01989280991256237, 'timestamp': '2025-09-10 02:17:23.325658', 'step': 719, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 8543129804160}, 'timestamp': '2025-09-10 02:17:23.357652', 'step': 719, 'epoch': 1} {'type': 'loss', 'content': 0.011787742376327515, 'timestamp': '2025-09-10 02:17:23.389493', 'step': 720, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 6644714900096}, 'timestamp': '2025-09-10 02:17:23.419435', 'step': 720, 'epoch': 1} {'type': 'loss', 'content': 0.007818952202796936, 'timestamp': '2025-09-10 02:17:23.424154', 'step': 721, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 8543129804160}, 'timestamp': '2025-09-10 02:17:23.455056', 'step': 721, 'epoch': 1} {'type': 'loss', 'content': 0.02153034135699272, 'timestamp': '2025-09-10 02:17:23.466054', 'step': 722, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 6170111174080}, 'timestamp': '2025-09-10 02:17:23.496351', 'step': 722, 'epoch': 1} {'type': 'loss', 'content': 0.01763448491692543, 'timestamp': '2025-09-10 02:17:23.503135', 'step': 723, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 6644714900096}, 'timestamp': '2025-09-10 02:17:23.533766', 'step': 723, 'epoch': 1} {'type': 'loss', 'content': 0.017756912857294083, 'timestamp': '2025-09-10 02:17:23.561649', 'step': 724, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 5695507448064}, 'timestamp': '2025-09-10 02:17:23.592162', 'step': 724, 'epoch': 1} {'type': 'loss', 'content': 0.024670034646987915, 'timestamp': '2025-09-10 02:17:23.594202', 'step': 725, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 176], 'flops': 5220903722048}, 'timestamp': '2025-09-10 02:17:23.623940', 'step': 725, 'epoch': 1} {'type': 'loss', 'content': 0.009984654374420643, 'timestamp': '2025-09-10 02:17:23.628162', 'step': 726, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 5695507448064}, 'timestamp': '2025-09-10 02:17:23.659122', 'step': 726, 'epoch': 1} {'type': 'loss', 'content': 0.021593144163489342, 'timestamp': '2025-09-10 02:17:23.663205', 'step': 727, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 5695507448064}, 'timestamp': '2025-09-10 02:17:23.693579', 'step': 727, 'epoch': 1} {'type': 'loss', 'content': 0.025460926815867424, 'timestamp': '2025-09-10 02:17:23.719090', 'step': 728, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 416], 'flops': 12339959612288}, 'timestamp': '2025-09-10 02:17:23.755523', 'step': 728, 'epoch': 1} {'type': 'loss', 'content': 0.006311932113021612, 'timestamp': '2025-09-10 02:17:23.770962', 'step': 729, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 6170111174080}, 'timestamp': '2025-09-10 02:17:23.801168', 'step': 729, 'epoch': 1} {'type': 'loss', 'content': 0.013910098932683468, 'timestamp': '2025-09-10 02:17:23.807934', 'step': 730, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 320], 'flops': 9492337256192}, 'timestamp': '2025-09-10 02:17:23.839242', 'step': 730, 'epoch': 1} {'type': 'loss', 'content': 0.00481247017160058, 'timestamp': '2025-09-10 02:17:23.851799', 'step': 731, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 7119318626112}, 'timestamp': '2025-09-10 02:17:23.883378', 'step': 731, 'epoch': 1} {'type': 'loss', 'content': 0.013029472902417183, 'timestamp': '2025-09-10 02:17:23.911603', 'step': 732, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 8543129804160}, 'timestamp': '2025-09-10 02:17:23.943677', 'step': 732, 'epoch': 1} {'type': 'loss', 'content': 0.004847945179790258, 'timestamp': '2025-09-10 02:17:23.951290', 'step': 733, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 176], 'flops': 5220903722048}, 'timestamp': '2025-09-10 02:17:23.982241', 'step': 733, 'epoch': 1} {'type': 'loss', 'content': 0.01138862781226635, 'timestamp': '2025-09-10 02:17:23.986032', 'step': 734, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 6644714900096}, 'timestamp': '2025-09-10 02:17:24.017503', 'step': 734, 'epoch': 1} {'type': 'loss', 'content': 0.02151215262711048, 'timestamp': '2025-09-10 02:17:24.024494', 'step': 735, 'epoch': 1} {'type': 'flops', 'content': [{'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5695213340928}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3796808916480}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4429610391296}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4746011128704}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4746011128704}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6011614078336}, {'type': 'perplexity', 'in_batch_dim': [8, 736], 'batch_size': 8, 'flops': 14554433988352}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3796808916480}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4113209653888}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3796808916480}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4746011128704}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2847606704256}, {'type': 'perplexity', 'in_batch_dim': [8, 320], 'batch_size': 8, 'flops': 6328014815744}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5378812603520}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3796808916480}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4746011128704}, {'type': 'perplexity', 'in_batch_dim': [8, 352], 'batch_size': 8, 'flops': 6960816290560}, {'type': 'perplexity', 'in_batch_dim': [8, 384], 'batch_size': 8, 'flops': 7593617765376}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5378812603520}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4429610391296}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4113209653888}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5062411866112}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5062411866112}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4429610391296}, {'type': 'perplexity', 'in_batch_dim': [8, 480], 'batch_size': 8, 'flops': 9492022189824}, {'type': 'perplexity', 'in_batch_dim': [8, 448], 'batch_size': 8, 'flops': 8859220715008}, {'type': 'perplexity', 'in_batch_dim': [8, 320], 'batch_size': 8, 'flops': 6328014815744}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3796808916480}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6011614078336}, {'type': 'perplexity', 'in_batch_dim': [8, 320], 'batch_size': 8, 'flops': 6328014815744}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5695213340928}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4746011128704}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4429610391296}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5378812603520}, {'type': 'perplexity', 'in_batch_dim': [8, 544], 'batch_size': 8, 'flops': 10757625139456}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4429610391296}, {'type': 'perplexity', 'in_batch_dim': [8, 320], 'batch_size': 8, 'flops': 6328014815744}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4113209653888}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4113209653888}, {'type': 'perplexity', 'in_batch_dim': [8, 320], 'batch_size': 8, 'flops': 6328014815744}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3480408179072}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5062411866112}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6011614078336}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4429610391296}, {'type': 'perplexity', 'in_batch_dim': [8, 384], 'batch_size': 8, 'flops': 7593617765376}, {'type': 'perplexity', 'in_batch_dim': [8, 416], 'batch_size': 8, 'flops': 8226419240192}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4746011128704}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3480408179072}, {'type': 'perplexity', 'in_batch_dim': [8, 384], 'batch_size': 8, 'flops': 7593617765376}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3480408179072}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3480408179072}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5378812603520}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3796808916480}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4113209653888}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5062411866112}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5378812603520}, {'type': 'perplexity', 'in_batch_dim': [8, 320], 'batch_size': 8, 'flops': 6328014815744}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4429610391296}, {'type': 'perplexity', 'in_batch_dim': [8, 160], 'batch_size': 8, 'flops': 3164007441664}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4113209653888}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5695213340928}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3796808916480}, {'type': 'perplexity', 'in_batch_dim': [8, 160], 'batch_size': 8, 'flops': 3164007441664}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6011614078336}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5062411866112}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4429610391296}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3480408179072}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4746011128704}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3480408179072}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4113209653888}, {'type': 'perplexity', 'in_batch_dim': [8, 624], 'batch_size': 8, 'flops': 12339628826496}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4113209653888}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5695213340928}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5062411866112}, {'type': 'perplexity', 'in_batch_dim': [8, 384], 'batch_size': 8, 'flops': 7593617765376}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4429610391296}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4113209653888}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5062411866112}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4429610391296}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5695213340928}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3796808916480}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5378812603520}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4429610391296}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5378812603520}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3796808916480}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5695213340928}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4429610391296}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5062411866112}, {'type': 'perplexity', 'in_batch_dim': [8, 160], 'batch_size': 8, 'flops': 3164007441664}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4746011128704}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5695213340928}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3796808916480}, {'type': 'perplexity', 'in_batch_dim': [8, 416], 'batch_size': 8, 'flops': 8226419240192}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3796808916480}, {'type': 'perplexity', 'in_batch_dim': [8, 160], 'batch_size': 8, 'flops': 3164007441664}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4746011128704}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3796808916480}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2847606704256}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5378812603520}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4429610391296}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4113209653888}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4429610391296}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6011614078336}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4113209653888}, {'type': 'perplexity', 'in_batch_dim': [8, 320], 'batch_size': 8, 'flops': 6328014815744}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4746011128704}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4746011128704}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3796808916480}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5062411866112}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3480408179072}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3796808916480}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3480408179072}, {'type': 'perplexity', 'in_batch_dim': [8, 352], 'batch_size': 8, 'flops': 6960816290560}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5062411866112}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4429610391296}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6011614078336}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5062411866112}, {'type': 'perplexity', 'in_batch_dim': [8, 576], 'batch_size': 8, 'flops': 11390426614272}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4429610391296}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3796808916480}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6011614078336}, {'type': 'perplexity', 'in_batch_dim': [8, 432], 'batch_size': 8, 'flops': 8542819977600}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5378812603520}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5378812603520}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5062411866112}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4113209653888}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4113209653888}, {'type': 'perplexity', 'in_batch_dim': [8, 368], 'batch_size': 8, 'flops': 7277217027968}, {'type': 'perplexity', 'in_batch_dim': [8, 160], 'batch_size': 8, 'flops': 3164007441664}, {'type': 'perplexity', 'in_batch_dim': [8, 368], 'batch_size': 8, 'flops': 7277217027968}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3796808916480}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4746011128704}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4113209653888}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4429610391296}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5695213340928}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5695213340928}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5062411866112}, {'type': 'perplexity', 'in_batch_dim': [8, 432], 'batch_size': 8, 'flops': 8542819977600}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5062411866112}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4113209653888}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3480408179072}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3480408179072}, {'type': 'perplexity', 'in_batch_dim': [8, 160], 'batch_size': 8, 'flops': 3164007441664}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4113209653888}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3796808916480}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5062411866112}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 368], 'batch_size': 8, 'flops': 7277217027968}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5378812603520}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5378812603520}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5062411866112}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2847606704256}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4429610391296}, {'type': 'perplexity', 'in_batch_dim': [8, 320], 'batch_size': 8, 'flops': 6328014815744}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3796808916480}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4429610391296}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4113209653888}, {'type': 'perplexity', 'in_batch_dim': [8, 384], 'batch_size': 8, 'flops': 7593617765376}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5378812603520}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4746011128704}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3480408179072}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5062411866112}, {'type': 'perplexity', 'in_batch_dim': [8, 320], 'batch_size': 8, 'flops': 6328014815744}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5378812603520}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4113209653888}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4113209653888}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5695213340928}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3480408179072}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6011614078336}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4746011128704}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5378812603520}, {'type': 'perplexity', 'in_batch_dim': [8, 320], 'batch_size': 8, 'flops': 6328014815744}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4429610391296}, {'type': 'perplexity', 'in_batch_dim': [8, 320], 'batch_size': 8, 'flops': 6328014815744}, {'type': 'perplexity', 'in_batch_dim': [8, 416], 'batch_size': 8, 'flops': 8226419240192}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5062411866112}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5062411866112}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2847606704256}, {'type': 'perplexity', 'in_batch_dim': [8, 336], 'batch_size': 8, 'flops': 6644415553152}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6011614078336}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5062411866112}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4113209653888}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5695213340928}, {'type': 'perplexity', 'in_batch_dim': [8, 416], 'batch_size': 8, 'flops': 8226419240192}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3796808916480}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5695213340928}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4746011128704}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4429610391296}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4113209653888}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3480408179072}, {'type': 'perplexity', 'in_batch_dim': [8, 320], 'batch_size': 8, 'flops': 6328014815744}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3480408179072}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5378812603520}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6011614078336}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4429610391296}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5695213340928}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5695213340928}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3480408179072}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4429610391296}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5062411866112}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3480408179072}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4113209653888}, {'type': 'perplexity', 'in_batch_dim': [8, 352], 'batch_size': 8, 'flops': 6960816290560}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4113209653888}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2847606704256}, {'type': 'perplexity', 'in_batch_dim': [8, 160], 'batch_size': 8, 'flops': 3164007441664}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3796808916480}, {'type': 'perplexity', 'in_batch_dim': [8, 496], 'batch_size': 8, 'flops': 9808422927232}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3480408179072}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4113209653888}, {'type': 'perplexity', 'in_batch_dim': [8, 544], 'batch_size': 8, 'flops': 10757625139456}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6011614078336}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5378812603520}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5695213340928}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4746011128704}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5695213340928}, {'type': 'perplexity', 'in_batch_dim': [8, 384], 'batch_size': 8, 'flops': 7593617765376}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3480408179072}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5378812603520}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3480408179072}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5378812603520}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5062411866112}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3796808916480}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5062411866112}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5695213340928}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5378812603520}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4429610391296}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5062411866112}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3480408179072}, {'type': 'perplexity', 'in_batch_dim': [8, 656], 'batch_size': 8, 'flops': 12972430301312}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5062411866112}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3796808916480}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3480408179072}, {'type': 'perplexity', 'in_batch_dim': [8, 160], 'batch_size': 8, 'flops': 3164007441664}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5695213340928}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6011614078336}, {'type': 'perplexity', 'in_batch_dim': [8, 320], 'batch_size': 8, 'flops': 6328014815744}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5062411866112}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5062411866112}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5695213340928}, {'type': 'perplexity', 'in_batch_dim': [8, 320], 'batch_size': 8, 'flops': 6328014815744}, {'type': 'perplexity', 'in_batch_dim': [8, 384], 'batch_size': 8, 'flops': 7593617765376}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5062411866112}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4746011128704}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4113209653888}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4746011128704}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4746011128704}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3480408179072}, {'type': 'perplexity', 'in_batch_dim': [8, 320], 'batch_size': 8, 'flops': 6328014815744}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5695213340928}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5062411866112}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4113209653888}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4746011128704}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2847606704256}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3796808916480}, {'type': 'perplexity', 'in_batch_dim': [8, 336], 'batch_size': 8, 'flops': 6644415553152}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4113209653888}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4746011128704}, {'type': 'perplexity', 'in_batch_dim': [8, 496], 'batch_size': 8, 'flops': 9808422927232}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5378812603520}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5378812603520}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4429610391296}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5062411866112}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6011614078336}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4746011128704}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3796808916480}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4113209653888}, {'type': 'perplexity', 'in_batch_dim': [8, 496], 'batch_size': 8, 'flops': 9808422927232}, {'type': 'perplexity', 'in_batch_dim': [8, 448], 'batch_size': 8, 'flops': 8859220715008}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6011614078336}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3796808916480}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3480408179072}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5062411866112}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4429610391296}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4113209653888}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4746011128704}, {'type': 'perplexity', 'in_batch_dim': [8, 320], 'batch_size': 8, 'flops': 6328014815744}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6011614078336}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3480408179072}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3796808916480}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3796808916480}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4746011128704}, {'type': 'perplexity', 'in_batch_dim': [8, 416], 'batch_size': 8, 'flops': 8226419240192}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5695213340928}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6011614078336}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6011614078336}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4746011128704}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3480408179072}, {'type': 'perplexity', 'in_batch_dim': [8, 160], 'batch_size': 8, 'flops': 3164007441664}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5695213340928}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3480408179072}, {'type': 'perplexity', 'in_batch_dim': [8, 400], 'batch_size': 8, 'flops': 7910018502784}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4429610391296}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5378812603520}, {'type': 'perplexity', 'in_batch_dim': [8, 400], 'batch_size': 8, 'flops': 7910018502784}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5378812603520}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3480408179072}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6011614078336}, {'type': 'perplexity', 'in_batch_dim': [8, 160], 'batch_size': 8, 'flops': 3164007441664}, {'type': 'perplexity', 'in_batch_dim': [8, 352], 'batch_size': 8, 'flops': 6960816290560}, {'type': 'perplexity', 'in_batch_dim': [8, 320], 'batch_size': 8, 'flops': 6328014815744}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5062411866112}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5062411866112}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4113209653888}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4429610391296}, {'type': 'perplexity', 'in_batch_dim': [8, 352], 'batch_size': 8, 'flops': 6960816290560}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4113209653888}, {'type': 'perplexity', 'in_batch_dim': [8, 160], 'batch_size': 8, 'flops': 3164007441664}, {'type': 'perplexity', 'in_batch_dim': [8, 336], 'batch_size': 8, 'flops': 6644415553152}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4429610391296}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3796808916480}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5378812603520}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5378812603520}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4113209653888}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5378812603520}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3796808916480}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5378812603520}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5378812603520}, {'type': 'perplexity', 'in_batch_dim': [8, 160], 'batch_size': 8, 'flops': 3164007441664}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3480408179072}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6011614078336}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5695213340928}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5378812603520}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5378812603520}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4429610391296}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4746011128704}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6011614078336}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5062411866112}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6011614078336}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5378812603520}, {'type': 'perplexity', 'in_batch_dim': [8, 160], 'batch_size': 8, 'flops': 3164007441664}, {'type': 'perplexity', 'in_batch_dim': [8, 160], 'batch_size': 8, 'flops': 3164007441664}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5062411866112}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5062411866112}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3796808916480}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6011614078336}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4113209653888}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4746011128704}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5378812603520}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3480408179072}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3796808916480}, {'type': 'perplexity', 'in_batch_dim': [8, 320], 'batch_size': 8, 'flops': 6328014815744}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5062411866112}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4746011128704}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5062411866112}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4746011128704}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6011614078336}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3796808916480}, {'type': 'perplexity', 'in_batch_dim': [8, 160], 'batch_size': 8, 'flops': 3164007441664}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2847606704256}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4429610391296}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4746011128704}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5062411866112}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5378812603520}, {'type': 'perplexity', 'in_batch_dim': [8, 464], 'batch_size': 8, 'flops': 9175621452416}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4113209653888}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3480408179072}, {'type': 'perplexity', 'in_batch_dim': [8, 384], 'batch_size': 8, 'flops': 7593617765376}, {'type': 'perplexity', 'in_batch_dim': [8, 448], 'batch_size': 8, 'flops': 8859220715008}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5378812603520}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4113209653888}, {'type': 'perplexity', 'in_batch_dim': [8, 320], 'batch_size': 8, 'flops': 6328014815744}, {'type': 'perplexity', 'in_batch_dim': [8, 560], 'batch_size': 8, 'flops': 11074025876864}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5378812603520}, {'type': 'perplexity', 'in_batch_dim': [8, 160], 'batch_size': 8, 'flops': 3164007441664}, {'type': 'perplexity', 'in_batch_dim': [8, 320], 'batch_size': 8, 'flops': 6328014815744}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5062411866112}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6011614078336}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4746011128704}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6011614078336}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3796808916480}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4429610391296}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4113209653888}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4746011128704}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3480408179072}, {'type': 'perplexity', 'in_batch_dim': [8, 384], 'batch_size': 8, 'flops': 7593617765376}, {'type': 'perplexity', 'in_batch_dim': [8, 576], 'batch_size': 8, 'flops': 11390426614272}, {'type': 'perplexity', 'in_batch_dim': [8, 160], 'batch_size': 8, 'flops': 3164007441664}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4746011128704}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4746011128704}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4429610391296}, {'type': 'perplexity', 'in_batch_dim': [8, 336], 'batch_size': 8, 'flops': 6644415553152}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4746011128704}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3796808916480}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3796808916480}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5062411866112}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5378812603520}, {'type': 'perplexity', 'in_batch_dim': [8, 368], 'batch_size': 8, 'flops': 7277217027968}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3480408179072}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4113209653888}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4746011128704}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3480408179072}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4746011128704}, {'type': 'perplexity', 'in_batch_dim': [8, 1168], 'batch_size': 8, 'flops': 23097253898368}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5062411866112}, {'type': 'perplexity', 'in_batch_dim': [8, 160], 'batch_size': 8, 'flops': 3164007441664}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4746011128704}, {'type': 'perplexity', 'in_batch_dim': [8, 640], 'batch_size': 8, 'flops': 12656029563904}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5378812603520}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4429610391296}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4429610391296}, {'type': 'perplexity', 'in_batch_dim': [8, 384], 'batch_size': 8, 'flops': 7593617765376}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3480408179072}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4113209653888}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3796808916480}, {'type': 'perplexity', 'in_batch_dim': [8, 336], 'batch_size': 8, 'flops': 6644415553152}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3480408179072}, {'type': 'perplexity', 'in_batch_dim': [6, 208], 'batch_size': 8, 'flops': 4113209653888}], 'timestamp': '2025-09-10 02:17:34.139298', 'step': 735, 'epoch': 1} {'type': 'pplx', 'content': 18458793.49173297, 'timestamp': '2025-09-10 02:17:34.143714', 'step': 735, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 336], 'flops': 9966940982208}, 'timestamp': '2025-09-10 02:17:34.175489', 'step': 735, 'epoch': 1} {'type': 'loss', 'content': 0.01929275132715702, 'timestamp': '2025-09-10 02:17:34.209678', 'step': 736, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 400], 'flops': 11865355886272}, 'timestamp': '2025-09-10 02:17:34.246127', 'step': 736, 'epoch': 1} {'type': 'loss', 'content': 0.013413517735898495, 'timestamp': '2025-09-10 02:17:34.261241', 'step': 737, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 7119318626112}, 'timestamp': '2025-09-10 02:17:34.292957', 'step': 737, 'epoch': 1} {'type': 'loss', 'content': 0.0064349048770964146, 'timestamp': '2025-09-10 02:17:34.300048', 'step': 738, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 7593922352128}, 'timestamp': '2025-09-10 02:17:34.330515', 'step': 738, 'epoch': 1} {'type': 'loss', 'content': 0.007773600518703461, 'timestamp': '2025-09-10 02:17:34.338056', 'step': 739, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-10 02:17:34.369741', 'step': 739, 'epoch': 1} {'type': 'loss', 'content': 0.008945588953793049, 'timestamp': '2025-09-10 02:17:34.393563', 'step': 740, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 5695507448064}, 'timestamp': '2025-09-10 02:17:34.423783', 'step': 740, 'epoch': 1} {'type': 'loss', 'content': 0.024763548746705055, 'timestamp': '2025-09-10 02:17:34.426047', 'step': 741, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 6170111174080}, 'timestamp': '2025-09-10 02:17:34.456115', 'step': 741, 'epoch': 1} {'type': 'loss', 'content': 0.020738394930958748, 'timestamp': '2025-09-10 02:17:34.463004', 'step': 742, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 176], 'flops': 5220903722048}, 'timestamp': '2025-09-10 02:17:34.493870', 'step': 742, 'epoch': 1} {'type': 'loss', 'content': 0.0035269984509795904, 'timestamp': '2025-09-10 02:17:34.497726', 'step': 743, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 176], 'flops': 5220903722048}, 'timestamp': '2025-09-10 02:17:34.528175', 'step': 743, 'epoch': 1} {'type': 'loss', 'content': 0.030427515506744385, 'timestamp': '2025-09-10 02:17:34.553115', 'step': 744, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 5695507448064}, 'timestamp': '2025-09-10 02:17:34.583845', 'step': 744, 'epoch': 1} {'type': 'loss', 'content': 0.007679258938878775, 'timestamp': '2025-09-10 02:17:34.586081', 'step': 745, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 320], 'flops': 9492337256192}, 'timestamp': '2025-09-10 02:17:34.616894', 'step': 745, 'epoch': 1} {'type': 'loss', 'content': 0.028332481160759926, 'timestamp': '2025-09-10 02:17:34.629300', 'step': 746, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 6644714900096}, 'timestamp': '2025-09-10 02:17:34.659701', 'step': 746, 'epoch': 1} {'type': 'loss', 'content': 0.008473207242786884, 'timestamp': '2025-09-10 02:17:34.666562', 'step': 747, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 8068526078144}, 'timestamp': '2025-09-10 02:17:34.697133', 'step': 747, 'epoch': 1} {'type': 'loss', 'content': 0.017313247546553612, 'timestamp': '2025-09-10 02:17:34.728079', 'step': 748, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 6644714900096}, 'timestamp': '2025-09-10 02:17:34.758141', 'step': 748, 'epoch': 1} {'type': 'loss', 'content': 0.013101726770401001, 'timestamp': '2025-09-10 02:17:34.762842', 'step': 749, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 336], 'flops': 9966940982208}, 'timestamp': '2025-09-10 02:17:34.795984', 'step': 749, 'epoch': 1} {'type': 'loss', 'content': 0.0032856224570423365, 'timestamp': '2025-09-10 02:17:34.809377', 'step': 750, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 6644714900096}, 'timestamp': '2025-09-10 02:17:34.839594', 'step': 750, 'epoch': 1} {'type': 'loss', 'content': 0.011557753197848797, 'timestamp': '2025-09-10 02:17:34.846505', 'step': 751, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 6644714900096}, 'timestamp': '2025-09-10 02:17:34.877292', 'step': 751, 'epoch': 1} {'type': 'loss', 'content': 0.0019083227962255478, 'timestamp': '2025-09-10 02:17:34.904773', 'step': 752, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 8543129804160}, 'timestamp': '2025-09-10 02:17:34.936422', 'step': 752, 'epoch': 1} {'type': 'loss', 'content': 0.015568030066788197, 'timestamp': '2025-09-10 02:17:34.943847', 'step': 753, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 8543129804160}, 'timestamp': '2025-09-10 02:17:34.974977', 'step': 753, 'epoch': 1} {'type': 'loss', 'content': 0.004572854842990637, 'timestamp': '2025-09-10 02:17:34.985500', 'step': 754, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 7593922352128}, 'timestamp': '2025-09-10 02:17:35.015995', 'step': 754, 'epoch': 1} {'type': 'loss', 'content': 0.000890351424459368, 'timestamp': '2025-09-10 02:17:35.023548', 'step': 755, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 5695507448064}, 'timestamp': '2025-09-10 02:17:35.053850', 'step': 755, 'epoch': 1} {'type': 'loss', 'content': 0.01434353832155466, 'timestamp': '2025-09-10 02:17:35.079127', 'step': 756, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 7119318626112}, 'timestamp': '2025-09-10 02:17:35.110114', 'step': 756, 'epoch': 1} {'type': 'loss', 'content': 0.016249870881438255, 'timestamp': '2025-09-10 02:17:35.114852', 'step': 757, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 480], 'flops': 14238374516352}, 'timestamp': '2025-09-10 02:17:35.156515', 'step': 757, 'epoch': 1} {'type': 'loss', 'content': 0.038629692047834396, 'timestamp': '2025-09-10 02:17:35.173760', 'step': 758, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 320], 'flops': 9492337256192}, 'timestamp': '2025-09-10 02:17:35.205032', 'step': 758, 'epoch': 1} {'type': 'loss', 'content': 0.05195966735482216, 'timestamp': '2025-09-10 02:17:35.217498', 'step': 759, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 6644714900096}, 'timestamp': '2025-09-10 02:17:35.247998', 'step': 759, 'epoch': 1} {'type': 'loss', 'content': 0.03985142335295677, 'timestamp': '2025-09-10 02:17:35.275939', 'step': 760, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 6170111174080}, 'timestamp': '2025-09-10 02:17:35.306453', 'step': 760, 'epoch': 1} {'type': 'loss', 'content': 0.012833379209041595, 'timestamp': '2025-09-10 02:17:35.310647', 'step': 761, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 320], 'flops': 9492337256192}, 'timestamp': '2025-09-10 02:17:35.341151', 'step': 761, 'epoch': 1} {'type': 'loss', 'content': 0.018250539898872375, 'timestamp': '2025-09-10 02:17:35.353575', 'step': 762, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 384], 'flops': 11390752160256}, 'timestamp': '2025-09-10 02:17:35.387132', 'step': 762, 'epoch': 1} {'type': 'loss', 'content': 0.006811958272010088, 'timestamp': '2025-09-10 02:17:35.401072', 'step': 763, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 6170111174080}, 'timestamp': '2025-09-10 02:17:35.431899', 'step': 763, 'epoch': 1} {'type': 'loss', 'content': 0.01085501629859209, 'timestamp': '2025-09-10 02:17:35.459535', 'step': 764, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 6644714900096}, 'timestamp': '2025-09-10 02:17:35.489598', 'step': 764, 'epoch': 1} {'type': 'loss', 'content': 0.034105248749256134, 'timestamp': '2025-09-10 02:17:35.494325', 'step': 765, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 6644714900096}, 'timestamp': '2025-09-10 02:17:35.524429', 'step': 765, 'epoch': 1} {'type': 'loss', 'content': 0.007085829973220825, 'timestamp': '2025-09-10 02:17:35.531214', 'step': 766, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 400], 'flops': 11865355886272}, 'timestamp': '2025-09-10 02:17:35.569083', 'step': 766, 'epoch': 1} {'type': 'loss', 'content': 0.0038453133311122656, 'timestamp': '2025-09-10 02:17:35.584662', 'step': 767, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 6644714900096}, 'timestamp': '2025-09-10 02:17:35.616279', 'step': 767, 'epoch': 1} {'type': 'loss', 'content': 0.009612992405891418, 'timestamp': '2025-09-10 02:17:35.644013', 'step': 768, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 4746299996032}, 'timestamp': '2025-09-10 02:17:35.675253', 'step': 768, 'epoch': 1} {'type': 'loss', 'content': 0.012257935479283333, 'timestamp': '2025-09-10 02:17:35.677227', 'step': 769, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 304], 'flops': 9017733530176}, 'timestamp': '2025-09-10 02:17:35.708722', 'step': 769, 'epoch': 1} {'type': 'loss', 'content': 0.01698746345937252, 'timestamp': '2025-09-10 02:17:35.720306', 'step': 770, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 7119318626112}, 'timestamp': '2025-09-10 02:17:35.751747', 'step': 770, 'epoch': 1} {'type': 'loss', 'content': 0.01926126517355442, 'timestamp': '2025-09-10 02:17:35.758927', 'step': 771, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 176], 'flops': 5220903722048}, 'timestamp': '2025-09-10 02:17:35.791395', 'step': 771, 'epoch': 1} {'type': 'loss', 'content': 0.01315612904727459, 'timestamp': '2025-09-10 02:17:35.815732', 'step': 772, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 304], 'flops': 9017733530176}, 'timestamp': '2025-09-10 02:17:35.847862', 'step': 772, 'epoch': 1} {'type': 'loss', 'content': 0.017368396744132042, 'timestamp': '2025-09-10 02:17:35.856892', 'step': 773, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 7119318626112}, 'timestamp': '2025-09-10 02:17:35.892080', 'step': 773, 'epoch': 1} {'type': 'loss', 'content': 0.003305921098217368, 'timestamp': '2025-09-10 02:17:35.899155', 'step': 774, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 7119318626112}, 'timestamp': '2025-09-10 02:17:35.935778', 'step': 774, 'epoch': 1} {'type': 'loss', 'content': 0.006261749658733606, 'timestamp': '2025-09-10 02:17:35.942911', 'step': 775, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 5695507448064}, 'timestamp': '2025-09-10 02:17:35.989877', 'step': 775, 'epoch': 1} {'type': 'loss', 'content': 0.025168852880597115, 'timestamp': '2025-09-10 02:17:36.015181', 'step': 776, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 6644714900096}, 'timestamp': '2025-09-10 02:17:36.052838', 'step': 776, 'epoch': 1} {'type': 'loss', 'content': 0.014582036063075066, 'timestamp': '2025-09-10 02:17:36.057215', 'step': 777, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 8543129804160}, 'timestamp': '2025-09-10 02:17:36.095077', 'step': 777, 'epoch': 1} {'type': 'loss', 'content': 0.015908481553196907, 'timestamp': '2025-09-10 02:17:36.105369', 'step': 778, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 6644714900096}, 'timestamp': '2025-09-10 02:17:36.136298', 'step': 778, 'epoch': 1} {'type': 'loss', 'content': 0.004383227322250605, 'timestamp': '2025-09-10 02:17:36.142909', 'step': 779, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 6170111174080}, 'timestamp': '2025-09-10 02:17:36.173944', 'step': 779, 'epoch': 1} {'type': 'loss', 'content': 0.012968703173100948, 'timestamp': '2025-09-10 02:17:36.201145', 'step': 780, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 7593922352128}, 'timestamp': '2025-09-10 02:17:36.231798', 'step': 780, 'epoch': 1} {'type': 'loss', 'content': 0.015527973882853985, 'timestamp': '2025-09-10 02:17:36.236691', 'step': 781, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 352], 'flops': 10441544708224}, 'timestamp': '2025-09-10 02:17:36.271993', 'step': 781, 'epoch': 1} {'type': 'loss', 'content': 0.015053192153573036, 'timestamp': '2025-09-10 02:17:36.285682', 'step': 782, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 6644714900096}, 'timestamp': '2025-09-10 02:17:36.317256', 'step': 782, 'epoch': 1} {'type': 'loss', 'content': 0.012528965249657631, 'timestamp': '2025-09-10 02:17:36.324049', 'step': 783, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 416], 'flops': 12339959612288}, 'timestamp': '2025-09-10 02:17:36.362027', 'step': 783, 'epoch': 1} {'type': 'loss', 'content': 0.002056631725281477, 'timestamp': '2025-09-10 02:17:36.398818', 'step': 784, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 304], 'flops': 9017733530176}, 'timestamp': '2025-09-10 02:17:36.430314', 'step': 784, 'epoch': 1} {'type': 'loss', 'content': 0.004613164346665144, 'timestamp': '2025-09-10 02:17:36.439249', 'step': 785, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 400], 'flops': 11865355886272}, 'timestamp': '2025-09-10 02:17:36.477062', 'step': 785, 'epoch': 1} {'type': 'loss', 'content': 0.04229161515831947, 'timestamp': '2025-09-10 02:17:36.492661', 'step': 786, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 6644714900096}, 'timestamp': '2025-09-10 02:17:36.523478', 'step': 786, 'epoch': 1} {'type': 'loss', 'content': 0.03430848568677902, 'timestamp': '2025-09-10 02:17:36.530252', 'step': 787, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 368], 'flops': 10916148434240}, 'timestamp': '2025-09-10 02:17:36.563970', 'step': 787, 'epoch': 1} {'type': 'loss', 'content': 0.006993894465267658, 'timestamp': '2025-09-10 02:17:36.598606', 'step': 788, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 6170111174080}, 'timestamp': '2025-09-10 02:17:36.632384', 'step': 788, 'epoch': 1} {'type': 'loss', 'content': 0.004820824600756168, 'timestamp': '2025-09-10 02:17:36.636484', 'step': 789, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 6644714900096}, 'timestamp': '2025-09-10 02:17:36.667555', 'step': 789, 'epoch': 1} {'type': 'loss', 'content': 0.0256601981818676, 'timestamp': '2025-09-10 02:17:36.674098', 'step': 790, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 528], 'flops': 15662185694400}, 'timestamp': '2025-09-10 02:17:36.721295', 'step': 790, 'epoch': 1} {'type': 'loss', 'content': 0.013889133930206299, 'timestamp': '2025-09-10 02:17:36.740383', 'step': 791, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 7119318626112}, 'timestamp': '2025-09-10 02:17:36.771718', 'step': 791, 'epoch': 1} {'type': 'loss', 'content': 0.016334451735019684, 'timestamp': '2025-09-10 02:17:36.799678', 'step': 792, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 6644714900096}, 'timestamp': '2025-09-10 02:17:36.830397', 'step': 792, 'epoch': 1} {'type': 'loss', 'content': 0.013368922285735607, 'timestamp': '2025-09-10 02:17:36.834885', 'step': 793, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 7119318626112}, 'timestamp': '2025-09-10 02:17:36.867576', 'step': 793, 'epoch': 1} {'type': 'loss', 'content': 0.031976792961359024, 'timestamp': '2025-09-10 02:17:36.874661', 'step': 794, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 6644714900096}, 'timestamp': '2025-09-10 02:17:36.911198', 'step': 794, 'epoch': 1} {'type': 'loss', 'content': 0.0013559797080233693, 'timestamp': '2025-09-10 02:17:36.918054', 'step': 795, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 336], 'flops': 9966940982208}, 'timestamp': '2025-09-10 02:17:36.957012', 'step': 795, 'epoch': 1} {'type': 'loss', 'content': 0.04322435334324837, 'timestamp': '2025-09-10 02:17:36.991235', 'step': 796, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 352], 'flops': 10441544708224}, 'timestamp': '2025-09-10 02:17:37.031780', 'step': 796, 'epoch': 1} {'type': 'loss', 'content': 0.0115485405549407, 'timestamp': '2025-09-10 02:17:37.044793', 'step': 797, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 7119318626112}, 'timestamp': '2025-09-10 02:17:37.083728', 'step': 797, 'epoch': 1} {'type': 'loss', 'content': 0.003736104816198349, 'timestamp': '2025-09-10 02:17:37.090801', 'step': 798, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 176], 'flops': 5220903722048}, 'timestamp': '2025-09-10 02:17:37.124580', 'step': 798, 'epoch': 1} {'type': 'loss', 'content': 0.014022842049598694, 'timestamp': '2025-09-10 02:17:37.128190', 'step': 799, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 4746299996032}, 'timestamp': '2025-09-10 02:17:37.159139', 'step': 799, 'epoch': 1} {'type': 'loss', 'content': 0.013005274347960949, 'timestamp': '2025-09-10 02:17:37.182616', 'step': 800, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-10 02:17:37.213215', 'step': 800, 'epoch': 1} {'type': 'loss', 'content': 0.02133549191057682, 'timestamp': '2025-09-10 02:17:37.215490', 'step': 801, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 320], 'flops': 9492337256192}, 'timestamp': '2025-09-10 02:17:37.247157', 'step': 801, 'epoch': 1} {'type': 'loss', 'content': 0.0029299429152160883, 'timestamp': '2025-09-10 02:17:37.259354', 'step': 802, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 4746299996032}, 'timestamp': '2025-09-10 02:17:37.295020', 'step': 802, 'epoch': 1} {'type': 'loss', 'content': 0.0014492860063910484, 'timestamp': '2025-09-10 02:17:37.297558', 'step': 803, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 352], 'flops': 10441544708224}, 'timestamp': '2025-09-10 02:17:37.333756', 'step': 803, 'epoch': 1} {'type': 'loss', 'content': 0.021839609369635582, 'timestamp': '2025-09-10 02:17:37.368272', 'step': 804, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 6644714900096}, 'timestamp': '2025-09-10 02:17:37.400485', 'step': 804, 'epoch': 1} {'type': 'loss', 'content': 0.030333133414387703, 'timestamp': '2025-09-10 02:17:37.404770', 'step': 805, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 8543129804160}, 'timestamp': '2025-09-10 02:17:37.437794', 'step': 805, 'epoch': 1} {'type': 'loss', 'content': 0.006924864836037159, 'timestamp': '2025-09-10 02:17:37.448090', 'step': 806, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 6170111174080}, 'timestamp': '2025-09-10 02:17:37.479748', 'step': 806, 'epoch': 1} {'type': 'loss', 'content': 0.0072951540350914, 'timestamp': '2025-09-10 02:17:37.486157', 'step': 807, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 6170111174080}, 'timestamp': '2025-09-10 02:17:37.517782', 'step': 807, 'epoch': 1} {'type': 'loss', 'content': 0.006309094373136759, 'timestamp': '2025-09-10 02:17:37.545367', 'step': 808, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 6644714900096}, 'timestamp': '2025-09-10 02:17:37.580837', 'step': 808, 'epoch': 1} {'type': 'loss', 'content': 0.00040522878407500684, 'timestamp': '2025-09-10 02:17:37.585120', 'step': 809, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 4746299996032}, 'timestamp': '2025-09-10 02:17:37.617472', 'step': 809, 'epoch': 1} {'type': 'loss', 'content': 0.006362107116729021, 'timestamp': '2025-09-10 02:17:37.619848', 'step': 810, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 7119318626112}, 'timestamp': '2025-09-10 02:17:37.651402', 'step': 810, 'epoch': 1} {'type': 'loss', 'content': 0.008096226491034031, 'timestamp': '2025-09-10 02:17:37.658397', 'step': 811, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 352], 'flops': 10441544708224}, 'timestamp': '2025-09-10 02:17:37.691987', 'step': 811, 'epoch': 1} {'type': 'loss', 'content': 0.007765918970108032, 'timestamp': '2025-09-10 02:17:37.726560', 'step': 812, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 6644714900096}, 'timestamp': '2025-09-10 02:17:37.758589', 'step': 812, 'epoch': 1} {'type': 'loss', 'content': 0.019435886293649673, 'timestamp': '2025-09-10 02:17:37.762951', 'step': 813, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 176], 'flops': 5220903722048}, 'timestamp': '2025-09-10 02:17:37.794126', 'step': 813, 'epoch': 1} {'type': 'loss', 'content': 0.009066428057849407, 'timestamp': '2025-09-10 02:17:37.797550', 'step': 814, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 8068526078144}, 'timestamp': '2025-09-10 02:17:37.828748', 'step': 814, 'epoch': 1} {'type': 'loss', 'content': 0.0014025976415723562, 'timestamp': '2025-09-10 02:17:37.838601', 'step': 815, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 8068526078144}, 'timestamp': '2025-09-10 02:17:37.870535', 'step': 815, 'epoch': 1} {'type': 'loss', 'content': 0.023165103048086166, 'timestamp': '2025-09-10 02:17:37.900977', 'step': 816, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 352], 'flops': 10441544708224}, 'timestamp': '2025-09-10 02:17:37.933424', 'step': 816, 'epoch': 1} {'type': 'loss', 'content': 0.012905867770314217, 'timestamp': '2025-09-10 02:17:37.946463', 'step': 817, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 336], 'flops': 9966940982208}, 'timestamp': '2025-09-10 02:17:37.979295', 'step': 817, 'epoch': 1} {'type': 'loss', 'content': 0.023732444271445274, 'timestamp': '2025-09-10 02:17:37.992647', 'step': 818, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 8068526078144}, 'timestamp': '2025-09-10 02:17:38.024568', 'step': 818, 'epoch': 1} {'type': 'loss', 'content': 0.03576406463980675, 'timestamp': '2025-09-10 02:17:38.033872', 'step': 819, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 5695507448064}, 'timestamp': '2025-09-10 02:17:38.064401', 'step': 819, 'epoch': 1} {'type': 'loss', 'content': 0.039100583642721176, 'timestamp': '2025-09-10 02:17:38.089401', 'step': 820, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 6170111174080}, 'timestamp': '2025-09-10 02:17:38.120591', 'step': 820, 'epoch': 1} {'type': 'loss', 'content': 0.019176315516233444, 'timestamp': '2025-09-10 02:17:38.124732', 'step': 821, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 7593922352128}, 'timestamp': '2025-09-10 02:17:38.156050', 'step': 821, 'epoch': 1} {'type': 'loss', 'content': 0.06485612690448761, 'timestamp': '2025-09-10 02:17:38.163315', 'step': 822, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:17:38.193669', 'step': 822, 'epoch': 1} {'type': 'loss', 'content': 0.0012710961746051908, 'timestamp': '2025-09-10 02:17:38.197109', 'step': 823, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 5695507448064}, 'timestamp': '2025-09-10 02:17:38.227795', 'step': 823, 'epoch': 1} {'type': 'loss', 'content': 0.002674340968951583, 'timestamp': '2025-09-10 02:17:38.252966', 'step': 824, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 8068526078144}, 'timestamp': '2025-09-10 02:17:38.284796', 'step': 824, 'epoch': 1} {'type': 'loss', 'content': 0.005030173342674971, 'timestamp': '2025-09-10 02:17:38.291925', 'step': 825, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 176], 'flops': 5220903722048}, 'timestamp': '2025-09-10 02:17:38.324273', 'step': 825, 'epoch': 1} {'type': 'loss', 'content': 0.033865850418806076, 'timestamp': '2025-09-10 02:17:38.327924', 'step': 826, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 5695507448064}, 'timestamp': '2025-09-10 02:17:38.359190', 'step': 826, 'epoch': 1} {'type': 'loss', 'content': 0.051615625619888306, 'timestamp': '2025-09-10 02:17:38.363253', 'step': 827, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 6644714900096}, 'timestamp': '2025-09-10 02:17:38.393455', 'step': 827, 'epoch': 1} {'type': 'loss', 'content': 0.005924302618950605, 'timestamp': '2025-09-10 02:17:38.421082', 'step': 828, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 304], 'flops': 9017733530176}, 'timestamp': '2025-09-10 02:17:38.452362', 'step': 828, 'epoch': 1} {'type': 'loss', 'content': 0.009351923130452633, 'timestamp': '2025-09-10 02:17:38.461365', 'step': 829, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 7593922352128}, 'timestamp': '2025-09-10 02:17:38.492734', 'step': 829, 'epoch': 1} {'type': 'loss', 'content': 0.014958539046347141, 'timestamp': '2025-09-10 02:17:38.500339', 'step': 830, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 6644714900096}, 'timestamp': '2025-09-10 02:17:38.532470', 'step': 830, 'epoch': 1} {'type': 'loss', 'content': 0.009349385276436806, 'timestamp': '2025-09-10 02:17:38.538976', 'step': 831, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 320], 'flops': 9492337256192}, 'timestamp': '2025-09-10 02:17:38.570077', 'step': 831, 'epoch': 1} {'type': 'loss', 'content': 0.026025842875242233, 'timestamp': '2025-09-10 02:17:38.603218', 'step': 832, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 8068526078144}, 'timestamp': '2025-09-10 02:17:38.635495', 'step': 832, 'epoch': 1} {'type': 'loss', 'content': 0.018966345116496086, 'timestamp': '2025-09-10 02:17:38.642712', 'step': 833, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 6644714900096}, 'timestamp': '2025-09-10 02:17:38.674175', 'step': 833, 'epoch': 1} {'type': 'loss', 'content': 0.04127897694706917, 'timestamp': '2025-09-10 02:17:38.680832', 'step': 834, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 7119318626112}, 'timestamp': '2025-09-10 02:17:38.712426', 'step': 834, 'epoch': 1} {'type': 'loss', 'content': 0.04661082848906517, 'timestamp': '2025-09-10 02:17:38.719223', 'step': 835, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 7119318626112}, 'timestamp': '2025-09-10 02:17:38.751110', 'step': 835, 'epoch': 1} {'type': 'loss', 'content': 0.022765228524804115, 'timestamp': '2025-09-10 02:17:38.778922', 'step': 836, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 304], 'flops': 9017733530176}, 'timestamp': '2025-09-10 02:17:38.809955', 'step': 836, 'epoch': 1} {'type': 'loss', 'content': 0.01644345186650753, 'timestamp': '2025-09-10 02:17:38.818849', 'step': 837, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 416], 'flops': 12339959612288}, 'timestamp': '2025-09-10 02:17:38.857840', 'step': 837, 'epoch': 1} {'type': 'loss', 'content': 0.046977002173662186, 'timestamp': '2025-09-10 02:17:38.873689', 'step': 838, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 5695507448064}, 'timestamp': '2025-09-10 02:17:38.905257', 'step': 838, 'epoch': 1} {'type': 'loss', 'content': 0.02852710708975792, 'timestamp': '2025-09-10 02:17:38.909440', 'step': 839, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 6644714900096}, 'timestamp': '2025-09-10 02:17:38.939934', 'step': 839, 'epoch': 1} {'type': 'loss', 'content': 0.04006481543183327, 'timestamp': '2025-09-10 02:17:38.967495', 'step': 840, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 480], 'flops': 14238374516352}, 'timestamp': '2025-09-10 02:17:39.007106', 'step': 840, 'epoch': 1} {'type': 'loss', 'content': 0.026953106746077538, 'timestamp': '2025-09-10 02:17:39.023998', 'step': 841, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 6644714900096}, 'timestamp': '2025-09-10 02:17:39.054804', 'step': 841, 'epoch': 1} {'type': 'loss', 'content': 0.010299399495124817, 'timestamp': '2025-09-10 02:17:39.061572', 'step': 842, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 4746299996032}, 'timestamp': '2025-09-10 02:17:39.091752', 'step': 842, 'epoch': 1} {'type': 'loss', 'content': 0.018122700974345207, 'timestamp': '2025-09-10 02:17:39.094076', 'step': 843, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 7593922352128}, 'timestamp': '2025-09-10 02:17:39.125506', 'step': 843, 'epoch': 1} {'type': 'loss', 'content': 0.015159577131271362, 'timestamp': '2025-09-10 02:17:39.153533', 'step': 844, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 432], 'flops': 12814563338304}, 'timestamp': '2025-09-10 02:17:39.190048', 'step': 844, 'epoch': 1} {'type': 'loss', 'content': 0.017123881727457047, 'timestamp': '2025-09-10 02:17:39.205650', 'step': 845, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 352], 'flops': 10441544708224}, 'timestamp': '2025-09-10 02:17:39.240889', 'step': 845, 'epoch': 1} {'type': 'loss', 'content': 0.005689023993909359, 'timestamp': '2025-09-10 02:17:39.254559', 'step': 846, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 384], 'flops': 11390752160256}, 'timestamp': '2025-09-10 02:17:39.288982', 'step': 846, 'epoch': 1} {'type': 'loss', 'content': 0.016331713646650314, 'timestamp': '2025-09-10 02:17:39.302923', 'step': 847, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 176], 'flops': 5220903722048}, 'timestamp': '2025-09-10 02:17:39.335924', 'step': 847, 'epoch': 1} {'type': 'loss', 'content': 0.022277653217315674, 'timestamp': '2025-09-10 02:17:39.360399', 'step': 848, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 5695507448064}, 'timestamp': '2025-09-10 02:17:39.392440', 'step': 848, 'epoch': 1} {'type': 'loss', 'content': 0.027887245640158653, 'timestamp': '2025-09-10 02:17:39.394610', 'step': 849, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 8543129804160}, 'timestamp': '2025-09-10 02:17:39.425130', 'step': 849, 'epoch': 1} {'type': 'loss', 'content': 0.006013993173837662, 'timestamp': '2025-09-10 02:17:39.435458', 'step': 850, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 6170111174080}, 'timestamp': '2025-09-10 02:17:39.466820', 'step': 850, 'epoch': 1} {'type': 'loss', 'content': 0.00961573701351881, 'timestamp': '2025-09-10 02:17:39.473322', 'step': 851, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 8068526078144}, 'timestamp': '2025-09-10 02:17:39.504528', 'step': 851, 'epoch': 1} {'type': 'loss', 'content': 0.028055304661393166, 'timestamp': '2025-09-10 02:17:39.534905', 'step': 852, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 480], 'flops': 14238374516352}, 'timestamp': '2025-09-10 02:17:39.574164', 'step': 852, 'epoch': 1} {'type': 'loss', 'content': 0.010051725432276726, 'timestamp': '2025-09-10 02:17:39.591067', 'step': 853, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 416], 'flops': 12339959612288}, 'timestamp': '2025-09-10 02:17:39.630072', 'step': 853, 'epoch': 1} {'type': 'loss', 'content': 0.008215261623263359, 'timestamp': '2025-09-10 02:17:39.645899', 'step': 854, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 6170111174080}, 'timestamp': '2025-09-10 02:17:39.677467', 'step': 854, 'epoch': 1} {'type': 'loss', 'content': 0.014234711416065693, 'timestamp': '2025-09-10 02:17:39.683693', 'step': 855, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 8068526078144}, 'timestamp': '2025-09-10 02:17:39.715222', 'step': 855, 'epoch': 1} {'type': 'loss', 'content': 0.011703640222549438, 'timestamp': '2025-09-10 02:17:39.745554', 'step': 856, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 304], 'flops': 9017733530176}, 'timestamp': '2025-09-10 02:17:39.777271', 'step': 856, 'epoch': 1} {'type': 'loss', 'content': 0.010386110283434391, 'timestamp': '2025-09-10 02:17:39.786202', 'step': 857, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 6644714900096}, 'timestamp': '2025-09-10 02:17:39.818376', 'step': 857, 'epoch': 1} {'type': 'loss', 'content': 0.026391830295324326, 'timestamp': '2025-09-10 02:17:39.825003', 'step': 858, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 6644714900096}, 'timestamp': '2025-09-10 02:17:39.856397', 'step': 858, 'epoch': 1} {'type': 'loss', 'content': 0.02559771202504635, 'timestamp': '2025-09-10 02:17:39.862969', 'step': 859, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 384], 'flops': 11390752160256}, 'timestamp': '2025-09-10 02:17:39.899353', 'step': 859, 'epoch': 1} {'type': 'loss', 'content': 0.019128460437059402, 'timestamp': '2025-09-10 02:17:39.934178', 'step': 860, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 336], 'flops': 9966940982208}, 'timestamp': '2025-09-10 02:17:39.966353', 'step': 860, 'epoch': 1} {'type': 'loss', 'content': 0.00833536684513092, 'timestamp': '2025-09-10 02:17:39.978583', 'step': 861, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 6644714900096}, 'timestamp': '2025-09-10 02:17:40.009753', 'step': 861, 'epoch': 1} {'type': 'loss', 'content': 0.012589543126523495, 'timestamp': '2025-09-10 02:17:40.016543', 'step': 862, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 5695507448064}, 'timestamp': '2025-09-10 02:17:40.049684', 'step': 862, 'epoch': 1} {'type': 'loss', 'content': 0.006604044698178768, 'timestamp': '2025-09-10 02:17:40.053898', 'step': 863, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 6170111174080}, 'timestamp': '2025-09-10 02:17:40.086709', 'step': 863, 'epoch': 1} {'type': 'loss', 'content': 0.02710030786693096, 'timestamp': '2025-09-10 02:17:40.114207', 'step': 864, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 8543129804160}, 'timestamp': '2025-09-10 02:17:40.145830', 'step': 864, 'epoch': 1} {'type': 'loss', 'content': 0.006634250283241272, 'timestamp': '2025-09-10 02:17:40.153831', 'step': 865, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 6170111174080}, 'timestamp': '2025-09-10 02:17:40.184219', 'step': 865, 'epoch': 1} {'type': 'loss', 'content': 0.03137756139039993, 'timestamp': '2025-09-10 02:17:40.190972', 'step': 866, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 7593922352128}, 'timestamp': '2025-09-10 02:17:40.222239', 'step': 866, 'epoch': 1} {'type': 'loss', 'content': 0.02019382454454899, 'timestamp': '2025-09-10 02:17:40.229805', 'step': 867, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 6644714900096}, 'timestamp': '2025-09-10 02:17:40.260687', 'step': 867, 'epoch': 1} {'type': 'loss', 'content': 0.00791159924119711, 'timestamp': '2025-09-10 02:17:40.288245', 'step': 868, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 6170111174080}, 'timestamp': '2025-09-10 02:17:40.318958', 'step': 868, 'epoch': 1} {'type': 'loss', 'content': 0.005334521643817425, 'timestamp': '2025-09-10 02:17:40.323049', 'step': 869, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 5695507448064}, 'timestamp': '2025-09-10 02:17:40.354600', 'step': 869, 'epoch': 1} {'type': 'loss', 'content': 0.009703114628791809, 'timestamp': '2025-09-10 02:17:40.358892', 'step': 870, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 7593922352128}, 'timestamp': '2025-09-10 02:17:40.390479', 'step': 870, 'epoch': 1} {'type': 'loss', 'content': 0.029153967276215553, 'timestamp': '2025-09-10 02:17:40.397821', 'step': 871, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 4746299996032}, 'timestamp': '2025-09-10 02:17:40.428409', 'step': 871, 'epoch': 1} {'type': 'loss', 'content': 0.007150101009756327, 'timestamp': '2025-09-10 02:17:40.451663', 'step': 872, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 6644714900096}, 'timestamp': '2025-09-10 02:17:40.482550', 'step': 872, 'epoch': 1} {'type': 'loss', 'content': 0.008403966203331947, 'timestamp': '2025-09-10 02:17:40.486683', 'step': 873, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 336], 'flops': 9966940982208}, 'timestamp': '2025-09-10 02:17:40.519587', 'step': 873, 'epoch': 1} {'type': 'loss', 'content': 0.029630528762936592, 'timestamp': '2025-09-10 02:17:40.532895', 'step': 874, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 6644714900096}, 'timestamp': '2025-09-10 02:17:40.564255', 'step': 874, 'epoch': 1} {'type': 'loss', 'content': 0.011236722581088543, 'timestamp': '2025-09-10 02:17:40.571169', 'step': 875, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 7119318626112}, 'timestamp': '2025-09-10 02:17:40.602266', 'step': 875, 'epoch': 1} {'type': 'loss', 'content': 0.01793195679783821, 'timestamp': '2025-09-10 02:17:40.630123', 'step': 876, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 5695507448064}, 'timestamp': '2025-09-10 02:17:40.660927', 'step': 876, 'epoch': 1} {'type': 'loss', 'content': 0.004515457898378372, 'timestamp': '2025-09-10 02:17:40.663538', 'step': 877, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 5695507448064}, 'timestamp': '2025-09-10 02:17:40.694461', 'step': 877, 'epoch': 1} {'type': 'loss', 'content': 0.01135172974318266, 'timestamp': '2025-09-10 02:17:40.698873', 'step': 878, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 8068526078144}, 'timestamp': '2025-09-10 02:17:40.729815', 'step': 878, 'epoch': 1} {'type': 'loss', 'content': 0.00693327933549881, 'timestamp': '2025-09-10 02:17:40.739606', 'step': 879, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 6644714900096}, 'timestamp': '2025-09-10 02:17:40.771350', 'step': 879, 'epoch': 1} {'type': 'loss', 'content': 0.017033789306879044, 'timestamp': '2025-09-10 02:17:40.799018', 'step': 880, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 320], 'flops': 9492337256192}, 'timestamp': '2025-09-10 02:17:40.829888', 'step': 880, 'epoch': 1} {'type': 'loss', 'content': 0.012997663579881191, 'timestamp': '2025-09-10 02:17:40.839821', 'step': 881, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 8543129804160}, 'timestamp': '2025-09-10 02:17:40.870538', 'step': 881, 'epoch': 1} {'type': 'loss', 'content': 0.009415126405656338, 'timestamp': '2025-09-10 02:17:40.881247', 'step': 882, 'epoch': 1} {'type': 'flops', 'content': [{'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5695213340928}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3796808916480}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4429610391296}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4746011128704}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4746011128704}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6011614078336}, {'type': 'perplexity', 'in_batch_dim': [8, 736], 'batch_size': 8, 'flops': 14554433988352}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3796808916480}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4113209653888}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3796808916480}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4746011128704}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2847606704256}, {'type': 'perplexity', 'in_batch_dim': [8, 320], 'batch_size': 8, 'flops': 6328014815744}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5378812603520}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3796808916480}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4746011128704}, {'type': 'perplexity', 'in_batch_dim': [8, 352], 'batch_size': 8, 'flops': 6960816290560}, {'type': 'perplexity', 'in_batch_dim': [8, 384], 'batch_size': 8, 'flops': 7593617765376}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5378812603520}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4429610391296}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4113209653888}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5062411866112}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5062411866112}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4429610391296}, {'type': 'perplexity', 'in_batch_dim': [8, 480], 'batch_size': 8, 'flops': 9492022189824}, {'type': 'perplexity', 'in_batch_dim': [8, 448], 'batch_size': 8, 'flops': 8859220715008}, {'type': 'perplexity', 'in_batch_dim': [8, 320], 'batch_size': 8, 'flops': 6328014815744}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3796808916480}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6011614078336}, {'type': 'perplexity', 'in_batch_dim': [8, 320], 'batch_size': 8, 'flops': 6328014815744}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5695213340928}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4746011128704}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4429610391296}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5378812603520}, {'type': 'perplexity', 'in_batch_dim': [8, 544], 'batch_size': 8, 'flops': 10757625139456}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4429610391296}, {'type': 'perplexity', 'in_batch_dim': [8, 320], 'batch_size': 8, 'flops': 6328014815744}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4113209653888}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4113209653888}, {'type': 'perplexity', 'in_batch_dim': [8, 320], 'batch_size': 8, 'flops': 6328014815744}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3480408179072}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5062411866112}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6011614078336}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4429610391296}, {'type': 'perplexity', 'in_batch_dim': [8, 384], 'batch_size': 8, 'flops': 7593617765376}, {'type': 'perplexity', 'in_batch_dim': [8, 416], 'batch_size': 8, 'flops': 8226419240192}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4746011128704}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3480408179072}, {'type': 'perplexity', 'in_batch_dim': [8, 384], 'batch_size': 8, 'flops': 7593617765376}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3480408179072}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3480408179072}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5378812603520}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3796808916480}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4113209653888}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5062411866112}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5378812603520}, {'type': 'perplexity', 'in_batch_dim': [8, 320], 'batch_size': 8, 'flops': 6328014815744}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4429610391296}, {'type': 'perplexity', 'in_batch_dim': [8, 160], 'batch_size': 8, 'flops': 3164007441664}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4113209653888}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5695213340928}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3796808916480}, {'type': 'perplexity', 'in_batch_dim': [8, 160], 'batch_size': 8, 'flops': 3164007441664}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6011614078336}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5062411866112}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4429610391296}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3480408179072}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4746011128704}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3480408179072}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4113209653888}, {'type': 'perplexity', 'in_batch_dim': [8, 624], 'batch_size': 8, 'flops': 12339628826496}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4113209653888}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5695213340928}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5062411866112}, {'type': 'perplexity', 'in_batch_dim': [8, 384], 'batch_size': 8, 'flops': 7593617765376}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4429610391296}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4113209653888}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5062411866112}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4429610391296}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5695213340928}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3796808916480}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5378812603520}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4429610391296}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5378812603520}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3796808916480}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5695213340928}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4429610391296}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5062411866112}, {'type': 'perplexity', 'in_batch_dim': [8, 160], 'batch_size': 8, 'flops': 3164007441664}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4746011128704}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5695213340928}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3796808916480}, {'type': 'perplexity', 'in_batch_dim': [8, 416], 'batch_size': 8, 'flops': 8226419240192}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3796808916480}, {'type': 'perplexity', 'in_batch_dim': [8, 160], 'batch_size': 8, 'flops': 3164007441664}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4746011128704}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3796808916480}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2847606704256}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5378812603520}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4429610391296}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4113209653888}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4429610391296}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6011614078336}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4113209653888}, {'type': 'perplexity', 'in_batch_dim': [8, 320], 'batch_size': 8, 'flops': 6328014815744}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4746011128704}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4746011128704}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3796808916480}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5062411866112}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3480408179072}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3796808916480}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3480408179072}, {'type': 'perplexity', 'in_batch_dim': [8, 352], 'batch_size': 8, 'flops': 6960816290560}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5062411866112}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4429610391296}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6011614078336}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5062411866112}, {'type': 'perplexity', 'in_batch_dim': [8, 576], 'batch_size': 8, 'flops': 11390426614272}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4429610391296}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3796808916480}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6011614078336}, {'type': 'perplexity', 'in_batch_dim': [8, 432], 'batch_size': 8, 'flops': 8542819977600}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5378812603520}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5378812603520}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5062411866112}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4113209653888}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4113209653888}, {'type': 'perplexity', 'in_batch_dim': [8, 368], 'batch_size': 8, 'flops': 7277217027968}, {'type': 'perplexity', 'in_batch_dim': [8, 160], 'batch_size': 8, 'flops': 3164007441664}, {'type': 'perplexity', 'in_batch_dim': [8, 368], 'batch_size': 8, 'flops': 7277217027968}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3796808916480}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4746011128704}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4113209653888}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4429610391296}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5695213340928}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5695213340928}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5062411866112}, {'type': 'perplexity', 'in_batch_dim': [8, 432], 'batch_size': 8, 'flops': 8542819977600}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5062411866112}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4113209653888}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3480408179072}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3480408179072}, {'type': 'perplexity', 'in_batch_dim': [8, 160], 'batch_size': 8, 'flops': 3164007441664}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4113209653888}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3796808916480}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5062411866112}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 368], 'batch_size': 8, 'flops': 7277217027968}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5378812603520}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5378812603520}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5062411866112}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2847606704256}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4429610391296}, {'type': 'perplexity', 'in_batch_dim': [8, 320], 'batch_size': 8, 'flops': 6328014815744}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3796808916480}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4429610391296}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4113209653888}, {'type': 'perplexity', 'in_batch_dim': [8, 384], 'batch_size': 8, 'flops': 7593617765376}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5378812603520}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4746011128704}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3480408179072}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5062411866112}, {'type': 'perplexity', 'in_batch_dim': [8, 320], 'batch_size': 8, 'flops': 6328014815744}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5378812603520}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4113209653888}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4113209653888}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5695213340928}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3480408179072}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6011614078336}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4746011128704}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5378812603520}, {'type': 'perplexity', 'in_batch_dim': [8, 320], 'batch_size': 8, 'flops': 6328014815744}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4429610391296}, {'type': 'perplexity', 'in_batch_dim': [8, 320], 'batch_size': 8, 'flops': 6328014815744}, {'type': 'perplexity', 'in_batch_dim': [8, 416], 'batch_size': 8, 'flops': 8226419240192}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5062411866112}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5062411866112}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2847606704256}, {'type': 'perplexity', 'in_batch_dim': [8, 336], 'batch_size': 8, 'flops': 6644415553152}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6011614078336}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5062411866112}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4113209653888}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5695213340928}, {'type': 'perplexity', 'in_batch_dim': [8, 416], 'batch_size': 8, 'flops': 8226419240192}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3796808916480}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5695213340928}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4746011128704}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4429610391296}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4113209653888}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3480408179072}, {'type': 'perplexity', 'in_batch_dim': [8, 320], 'batch_size': 8, 'flops': 6328014815744}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3480408179072}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5378812603520}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6011614078336}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4429610391296}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5695213340928}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5695213340928}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3480408179072}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4429610391296}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5062411866112}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3480408179072}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4113209653888}, {'type': 'perplexity', 'in_batch_dim': [8, 352], 'batch_size': 8, 'flops': 6960816290560}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4113209653888}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2847606704256}, {'type': 'perplexity', 'in_batch_dim': [8, 160], 'batch_size': 8, 'flops': 3164007441664}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3796808916480}, {'type': 'perplexity', 'in_batch_dim': [8, 496], 'batch_size': 8, 'flops': 9808422927232}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3480408179072}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4113209653888}, {'type': 'perplexity', 'in_batch_dim': [8, 544], 'batch_size': 8, 'flops': 10757625139456}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6011614078336}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5378812603520}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5695213340928}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4746011128704}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5695213340928}, {'type': 'perplexity', 'in_batch_dim': [8, 384], 'batch_size': 8, 'flops': 7593617765376}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3480408179072}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5378812603520}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3480408179072}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5378812603520}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5062411866112}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3796808916480}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5062411866112}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5695213340928}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5378812603520}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4429610391296}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5062411866112}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3480408179072}, {'type': 'perplexity', 'in_batch_dim': [8, 656], 'batch_size': 8, 'flops': 12972430301312}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5062411866112}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3796808916480}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3480408179072}, {'type': 'perplexity', 'in_batch_dim': [8, 160], 'batch_size': 8, 'flops': 3164007441664}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5695213340928}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6011614078336}, {'type': 'perplexity', 'in_batch_dim': [8, 320], 'batch_size': 8, 'flops': 6328014815744}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5062411866112}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5062411866112}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5695213340928}, {'type': 'perplexity', 'in_batch_dim': [8, 320], 'batch_size': 8, 'flops': 6328014815744}, {'type': 'perplexity', 'in_batch_dim': [8, 384], 'batch_size': 8, 'flops': 7593617765376}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5062411866112}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4746011128704}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4113209653888}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4746011128704}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4746011128704}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3480408179072}, {'type': 'perplexity', 'in_batch_dim': [8, 320], 'batch_size': 8, 'flops': 6328014815744}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5695213340928}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5062411866112}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4113209653888}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4746011128704}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2847606704256}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3796808916480}, {'type': 'perplexity', 'in_batch_dim': [8, 336], 'batch_size': 8, 'flops': 6644415553152}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4113209653888}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4746011128704}, {'type': 'perplexity', 'in_batch_dim': [8, 496], 'batch_size': 8, 'flops': 9808422927232}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5378812603520}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5378812603520}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4429610391296}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5062411866112}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6011614078336}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4746011128704}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3796808916480}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4113209653888}, {'type': 'perplexity', 'in_batch_dim': [8, 496], 'batch_size': 8, 'flops': 9808422927232}, {'type': 'perplexity', 'in_batch_dim': [8, 448], 'batch_size': 8, 'flops': 8859220715008}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6011614078336}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3796808916480}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3480408179072}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5062411866112}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4429610391296}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4113209653888}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4746011128704}, {'type': 'perplexity', 'in_batch_dim': [8, 320], 'batch_size': 8, 'flops': 6328014815744}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6011614078336}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3480408179072}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3796808916480}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3796808916480}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4746011128704}, {'type': 'perplexity', 'in_batch_dim': [8, 416], 'batch_size': 8, 'flops': 8226419240192}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5695213340928}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6011614078336}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6011614078336}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4746011128704}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3480408179072}, {'type': 'perplexity', 'in_batch_dim': [8, 160], 'batch_size': 8, 'flops': 3164007441664}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5695213340928}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3480408179072}, {'type': 'perplexity', 'in_batch_dim': [8, 400], 'batch_size': 8, 'flops': 7910018502784}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4429610391296}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5378812603520}, {'type': 'perplexity', 'in_batch_dim': [8, 400], 'batch_size': 8, 'flops': 7910018502784}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5378812603520}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3480408179072}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6011614078336}, {'type': 'perplexity', 'in_batch_dim': [8, 160], 'batch_size': 8, 'flops': 3164007441664}, {'type': 'perplexity', 'in_batch_dim': [8, 352], 'batch_size': 8, 'flops': 6960816290560}, {'type': 'perplexity', 'in_batch_dim': [8, 320], 'batch_size': 8, 'flops': 6328014815744}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5062411866112}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5062411866112}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4113209653888}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4429610391296}, {'type': 'perplexity', 'in_batch_dim': [8, 352], 'batch_size': 8, 'flops': 6960816290560}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4113209653888}, {'type': 'perplexity', 'in_batch_dim': [8, 160], 'batch_size': 8, 'flops': 3164007441664}, {'type': 'perplexity', 'in_batch_dim': [8, 336], 'batch_size': 8, 'flops': 6644415553152}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4429610391296}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3796808916480}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5378812603520}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5378812603520}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4113209653888}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5378812603520}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3796808916480}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5378812603520}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5378812603520}, {'type': 'perplexity', 'in_batch_dim': [8, 160], 'batch_size': 8, 'flops': 3164007441664}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3480408179072}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6011614078336}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5695213340928}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5378812603520}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5378812603520}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4429610391296}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4746011128704}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6011614078336}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5062411866112}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6011614078336}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5378812603520}, {'type': 'perplexity', 'in_batch_dim': [8, 160], 'batch_size': 8, 'flops': 3164007441664}, {'type': 'perplexity', 'in_batch_dim': [8, 160], 'batch_size': 8, 'flops': 3164007441664}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5062411866112}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5062411866112}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3796808916480}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6011614078336}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4113209653888}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4746011128704}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5378812603520}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3480408179072}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3796808916480}, {'type': 'perplexity', 'in_batch_dim': [8, 320], 'batch_size': 8, 'flops': 6328014815744}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5062411866112}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4746011128704}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5062411866112}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4746011128704}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6011614078336}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3796808916480}, {'type': 'perplexity', 'in_batch_dim': [8, 160], 'batch_size': 8, 'flops': 3164007441664}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2847606704256}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4429610391296}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4746011128704}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5062411866112}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5378812603520}, {'type': 'perplexity', 'in_batch_dim': [8, 464], 'batch_size': 8, 'flops': 9175621452416}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4113209653888}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3480408179072}, {'type': 'perplexity', 'in_batch_dim': [8, 384], 'batch_size': 8, 'flops': 7593617765376}, {'type': 'perplexity', 'in_batch_dim': [8, 448], 'batch_size': 8, 'flops': 8859220715008}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5378812603520}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4113209653888}, {'type': 'perplexity', 'in_batch_dim': [8, 320], 'batch_size': 8, 'flops': 6328014815744}, {'type': 'perplexity', 'in_batch_dim': [8, 560], 'batch_size': 8, 'flops': 11074025876864}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5378812603520}, {'type': 'perplexity', 'in_batch_dim': [8, 160], 'batch_size': 8, 'flops': 3164007441664}, {'type': 'perplexity', 'in_batch_dim': [8, 320], 'batch_size': 8, 'flops': 6328014815744}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5062411866112}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6011614078336}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4746011128704}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6011614078336}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3796808916480}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4429610391296}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4113209653888}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4746011128704}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3480408179072}, {'type': 'perplexity', 'in_batch_dim': [8, 384], 'batch_size': 8, 'flops': 7593617765376}, {'type': 'perplexity', 'in_batch_dim': [8, 576], 'batch_size': 8, 'flops': 11390426614272}, {'type': 'perplexity', 'in_batch_dim': [8, 160], 'batch_size': 8, 'flops': 3164007441664}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4746011128704}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4746011128704}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4429610391296}, {'type': 'perplexity', 'in_batch_dim': [8, 336], 'batch_size': 8, 'flops': 6644415553152}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4746011128704}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3796808916480}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3796808916480}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5062411866112}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5378812603520}, {'type': 'perplexity', 'in_batch_dim': [8, 368], 'batch_size': 8, 'flops': 7277217027968}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3480408179072}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4113209653888}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4746011128704}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3480408179072}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4746011128704}, {'type': 'perplexity', 'in_batch_dim': [8, 1168], 'batch_size': 8, 'flops': 23097253898368}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5062411866112}, {'type': 'perplexity', 'in_batch_dim': [8, 160], 'batch_size': 8, 'flops': 3164007441664}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4746011128704}, {'type': 'perplexity', 'in_batch_dim': [8, 640], 'batch_size': 8, 'flops': 12656029563904}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5378812603520}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4429610391296}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4429610391296}, {'type': 'perplexity', 'in_batch_dim': [8, 384], 'batch_size': 8, 'flops': 7593617765376}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3480408179072}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4113209653888}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3796808916480}, {'type': 'perplexity', 'in_batch_dim': [8, 336], 'batch_size': 8, 'flops': 6644415553152}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3480408179072}, {'type': 'perplexity', 'in_batch_dim': [6, 208], 'batch_size': 8, 'flops': 4113209653888}], 'timestamp': '2025-09-10 02:17:51.050797', 'step': 882, 'epoch': 1} {'type': 'pplx', 'content': 14730864.383457733, 'timestamp': '2025-09-10 02:17:51.055567', 'step': 882, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 8068526078144}, 'timestamp': '2025-09-10 02:17:51.091661', 'step': 882, 'epoch': 1} {'type': 'loss', 'content': 0.022233616560697556, 'timestamp': '2025-09-10 02:17:51.099797', 'step': 883, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 6170111174080}, 'timestamp': '2025-09-10 02:17:51.135781', 'step': 883, 'epoch': 1} {'type': 'loss', 'content': 0.009659935720264912, 'timestamp': '2025-09-10 02:17:51.163204', 'step': 884, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 8068526078144}, 'timestamp': '2025-09-10 02:17:51.201257', 'step': 884, 'epoch': 1} {'type': 'loss', 'content': 0.030786585062742233, 'timestamp': '2025-09-10 02:17:51.207893', 'step': 885, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 7593922352128}, 'timestamp': '2025-09-10 02:17:51.238953', 'step': 885, 'epoch': 1} {'type': 'loss', 'content': 0.021192189306020737, 'timestamp': '2025-09-10 02:17:51.246685', 'step': 886, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 784], 'flops': 23255845310656}, 'timestamp': '2025-09-10 02:17:51.318129', 'step': 886, 'epoch': 1} {'type': 'loss', 'content': 0.04029746726155281, 'timestamp': '2025-09-10 02:17:51.345232', 'step': 887, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 320], 'flops': 9492337256192}, 'timestamp': '2025-09-10 02:17:51.376862', 'step': 887, 'epoch': 1} {'type': 'loss', 'content': 0.006447978317737579, 'timestamp': '2025-09-10 02:17:51.410235', 'step': 888, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 176], 'flops': 5220903722048}, 'timestamp': '2025-09-10 02:17:51.441644', 'step': 888, 'epoch': 1} {'type': 'loss', 'content': 0.02322995476424694, 'timestamp': '2025-09-10 02:17:51.443876', 'step': 889, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 8543129804160}, 'timestamp': '2025-09-10 02:17:51.474542', 'step': 889, 'epoch': 1} {'type': 'loss', 'content': 0.010987287387251854, 'timestamp': '2025-09-10 02:17:51.485276', 'step': 890, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 8068526078144}, 'timestamp': '2025-09-10 02:17:51.516512', 'step': 890, 'epoch': 1} {'type': 'loss', 'content': 0.02331923507153988, 'timestamp': '2025-09-10 02:17:51.526594', 'step': 891, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 432], 'flops': 12814563338304}, 'timestamp': '2025-09-10 02:17:51.565807', 'step': 891, 'epoch': 1} {'type': 'loss', 'content': 0.003565514227375388, 'timestamp': '2025-09-10 02:17:51.602876', 'step': 892, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 4746299996032}, 'timestamp': '2025-09-10 02:17:51.633770', 'step': 892, 'epoch': 1} {'type': 'loss', 'content': 0.04227833077311516, 'timestamp': '2025-09-10 02:17:51.636076', 'step': 893, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 6170111174080}, 'timestamp': '2025-09-10 02:17:51.667184', 'step': 893, 'epoch': 1} {'type': 'loss', 'content': 0.021429577842354774, 'timestamp': '2025-09-10 02:17:51.674087', 'step': 894, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 7593922352128}, 'timestamp': '2025-09-10 02:17:51.705630', 'step': 894, 'epoch': 1} {'type': 'loss', 'content': 0.03835199028253555, 'timestamp': '2025-09-10 02:17:51.713251', 'step': 895, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 5695507448064}, 'timestamp': '2025-09-10 02:17:51.743135', 'step': 895, 'epoch': 1} {'type': 'loss', 'content': 0.0153651786968112, 'timestamp': '2025-09-10 02:17:51.768400', 'step': 896, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 176], 'flops': 5220903722048}, 'timestamp': '2025-09-10 02:17:51.801241', 'step': 896, 'epoch': 1} {'type': 'loss', 'content': 0.0387214757502079, 'timestamp': '2025-09-10 02:17:51.803342', 'step': 897, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 176], 'flops': 5220903722048}, 'timestamp': '2025-09-10 02:17:51.832852', 'step': 897, 'epoch': 1} {'type': 'loss', 'content': 0.0020098849199712276, 'timestamp': '2025-09-10 02:17:51.837127', 'step': 898, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 7593922352128}, 'timestamp': '2025-09-10 02:17:51.867212', 'step': 898, 'epoch': 1} {'type': 'loss', 'content': 0.009437446482479572, 'timestamp': '2025-09-10 02:17:51.875034', 'step': 899, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 7119318626112}, 'timestamp': '2025-09-10 02:17:51.906254', 'step': 899, 'epoch': 1} {'type': 'loss', 'content': 0.025736164301633835, 'timestamp': '2025-09-10 02:17:51.934806', 'step': 900, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 176], 'flops': 5220903722048}, 'timestamp': '2025-09-10 02:17:51.966020', 'step': 900, 'epoch': 1} {'type': 'loss', 'content': 0.001165196648798883, 'timestamp': '2025-09-10 02:17:51.968310', 'step': 901, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 7593922352128}, 'timestamp': '2025-09-10 02:17:52.000301', 'step': 901, 'epoch': 1} {'type': 'loss', 'content': 0.019891690462827682, 'timestamp': '2025-09-10 02:17:52.008143', 'step': 902, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 6170111174080}, 'timestamp': '2025-09-10 02:17:52.039005', 'step': 902, 'epoch': 1} {'type': 'loss', 'content': 0.03028137981891632, 'timestamp': '2025-09-10 02:17:52.045936', 'step': 903, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 400], 'flops': 11865355886272}, 'timestamp': '2025-09-10 02:17:52.084242', 'step': 903, 'epoch': 1} {'type': 'loss', 'content': 0.0014906581491231918, 'timestamp': '2025-09-10 02:17:52.120703', 'step': 904, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 8068526078144}, 'timestamp': '2025-09-10 02:17:52.151409', 'step': 904, 'epoch': 1} {'type': 'loss', 'content': 0.009351144544780254, 'timestamp': '2025-09-10 02:17:52.159191', 'step': 905, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 176], 'flops': 5220903722048}, 'timestamp': '2025-09-10 02:17:52.189988', 'step': 905, 'epoch': 1} {'type': 'loss', 'content': 0.032125215977430344, 'timestamp': '2025-09-10 02:17:52.193955', 'step': 906, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 496], 'flops': 14712978242368}, 'timestamp': '2025-09-10 02:17:52.237212', 'step': 906, 'epoch': 1} {'type': 'loss', 'content': 0.02834523655474186, 'timestamp': '2025-09-10 02:17:52.254904', 'step': 907, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 352], 'flops': 10441544708224}, 'timestamp': '2025-09-10 02:17:52.290019', 'step': 907, 'epoch': 1} {'type': 'loss', 'content': 0.011470122262835503, 'timestamp': '2025-09-10 02:17:52.324553', 'step': 908, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 176], 'flops': 5220903722048}, 'timestamp': '2025-09-10 02:17:52.357249', 'step': 908, 'epoch': 1} {'type': 'loss', 'content': 0.03763606771826744, 'timestamp': '2025-09-10 02:17:52.359462', 'step': 909, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 6644714900096}, 'timestamp': '2025-09-10 02:17:52.390369', 'step': 909, 'epoch': 1} {'type': 'loss', 'content': 0.0020886852871626616, 'timestamp': '2025-09-10 02:17:52.397115', 'step': 910, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 7119318626112}, 'timestamp': '2025-09-10 02:17:52.428896', 'step': 910, 'epoch': 1} {'type': 'loss', 'content': 0.00491158664226532, 'timestamp': '2025-09-10 02:17:52.436093', 'step': 911, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 6170111174080}, 'timestamp': '2025-09-10 02:17:52.466335', 'step': 911, 'epoch': 1} {'type': 'loss', 'content': 0.010647162795066833, 'timestamp': '2025-09-10 02:17:52.494032', 'step': 912, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 176], 'flops': 5220903722048}, 'timestamp': '2025-09-10 02:17:52.525074', 'step': 912, 'epoch': 1} {'type': 'loss', 'content': 0.02704322710633278, 'timestamp': '2025-09-10 02:17:52.527308', 'step': 913, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 7119318626112}, 'timestamp': '2025-09-10 02:17:52.558028', 'step': 913, 'epoch': 1} {'type': 'loss', 'content': 0.03703900799155235, 'timestamp': '2025-09-10 02:17:52.565527', 'step': 914, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 7119318626112}, 'timestamp': '2025-09-10 02:17:52.595613', 'step': 914, 'epoch': 1} {'type': 'loss', 'content': 0.05229032784700394, 'timestamp': '2025-09-10 02:17:52.603099', 'step': 915, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 176], 'flops': 5220903722048}, 'timestamp': '2025-09-10 02:17:52.632844', 'step': 915, 'epoch': 1} {'type': 'loss', 'content': 0.02464185282588005, 'timestamp': '2025-09-10 02:17:52.657740', 'step': 916, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 176], 'flops': 5220903722048}, 'timestamp': '2025-09-10 02:17:52.687050', 'step': 916, 'epoch': 1} {'type': 'loss', 'content': 0.029760537669062614, 'timestamp': '2025-09-10 02:17:52.688949', 'step': 917, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 7593922352128}, 'timestamp': '2025-09-10 02:17:52.719815', 'step': 917, 'epoch': 1} {'type': 'loss', 'content': 0.023682432249188423, 'timestamp': '2025-09-10 02:17:52.727378', 'step': 918, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 7593922352128}, 'timestamp': '2025-09-10 02:17:52.758497', 'step': 918, 'epoch': 1} {'type': 'loss', 'content': 0.0028019100427627563, 'timestamp': '2025-09-10 02:17:52.766244', 'step': 919, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 6170111174080}, 'timestamp': '2025-09-10 02:17:52.796453', 'step': 919, 'epoch': 1} {'type': 'loss', 'content': 0.013780993409454823, 'timestamp': '2025-09-10 02:17:52.824088', 'step': 920, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 6170111174080}, 'timestamp': '2025-09-10 02:17:52.854433', 'step': 920, 'epoch': 1} {'type': 'loss', 'content': 0.003197154263034463, 'timestamp': '2025-09-10 02:17:52.858986', 'step': 921, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 320], 'flops': 9492337256192}, 'timestamp': '2025-09-10 02:17:52.890585', 'step': 921, 'epoch': 1} {'type': 'loss', 'content': 0.002631398383527994, 'timestamp': '2025-09-10 02:17:52.903136', 'step': 922, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 8543129804160}, 'timestamp': '2025-09-10 02:17:52.934809', 'step': 922, 'epoch': 1} {'type': 'loss', 'content': 0.01756918616592884, 'timestamp': '2025-09-10 02:17:52.945697', 'step': 923, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 5695507448064}, 'timestamp': '2025-09-10 02:17:52.976339', 'step': 923, 'epoch': 1} {'type': 'loss', 'content': 0.022316312417387962, 'timestamp': '2025-09-10 02:17:53.001800', 'step': 924, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 8068526078144}, 'timestamp': '2025-09-10 02:17:53.033155', 'step': 924, 'epoch': 1} {'type': 'loss', 'content': 0.018341967836022377, 'timestamp': '2025-09-10 02:17:53.040820', 'step': 925, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 368], 'flops': 10916148434240}, 'timestamp': '2025-09-10 02:17:53.075483', 'step': 925, 'epoch': 1} {'type': 'loss', 'content': 0.007413599174469709, 'timestamp': '2025-09-10 02:17:53.089315', 'step': 926, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 7119318626112}, 'timestamp': '2025-09-10 02:17:53.119379', 'step': 926, 'epoch': 1} {'type': 'loss', 'content': 0.02032196894288063, 'timestamp': '2025-09-10 02:17:53.126669', 'step': 927, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 7593922352128}, 'timestamp': '2025-09-10 02:17:53.157084', 'step': 927, 'epoch': 1} {'type': 'loss', 'content': 0.007340454496443272, 'timestamp': '2025-09-10 02:17:53.185787', 'step': 928, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 8068526078144}, 'timestamp': '2025-09-10 02:17:53.218495', 'step': 928, 'epoch': 1} {'type': 'loss', 'content': 0.017748655751347542, 'timestamp': '2025-09-10 02:17:53.226305', 'step': 929, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 176], 'flops': 5220903722048}, 'timestamp': '2025-09-10 02:17:53.256717', 'step': 929, 'epoch': 1} {'type': 'loss', 'content': 0.042677875608205795, 'timestamp': '2025-09-10 02:17:53.260877', 'step': 930, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 7593922352128}, 'timestamp': '2025-09-10 02:17:53.291560', 'step': 930, 'epoch': 1} {'type': 'loss', 'content': 0.009248084388673306, 'timestamp': '2025-09-10 02:17:53.299141', 'step': 931, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 8543129804160}, 'timestamp': '2025-09-10 02:17:53.330419', 'step': 931, 'epoch': 1} {'type': 'loss', 'content': 0.015127205289900303, 'timestamp': '2025-09-10 02:17:53.361965', 'step': 932, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 5695507448064}, 'timestamp': '2025-09-10 02:17:53.392356', 'step': 932, 'epoch': 1} {'type': 'loss', 'content': 0.02554660104215145, 'timestamp': '2025-09-10 02:17:53.394539', 'step': 933, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 304], 'flops': 9017733530176}, 'timestamp': '2025-09-10 02:17:53.425626', 'step': 933, 'epoch': 1} {'type': 'loss', 'content': 0.018056869506835938, 'timestamp': '2025-09-10 02:17:53.437773', 'step': 934, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 7119318626112}, 'timestamp': '2025-09-10 02:17:53.468052', 'step': 934, 'epoch': 1} {'type': 'loss', 'content': 0.039137158542871475, 'timestamp': '2025-09-10 02:17:53.475535', 'step': 935, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 176], 'flops': 5220903722048}, 'timestamp': '2025-09-10 02:17:53.505811', 'step': 935, 'epoch': 1} {'type': 'loss', 'content': 0.03655305504798889, 'timestamp': '2025-09-10 02:17:53.530566', 'step': 936, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 8543129804160}, 'timestamp': '2025-09-10 02:17:53.563734', 'step': 936, 'epoch': 1} {'type': 'loss', 'content': 0.02264043502509594, 'timestamp': '2025-09-10 02:17:53.571984', 'step': 937, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 5695507448064}, 'timestamp': '2025-09-10 02:17:53.603803', 'step': 937, 'epoch': 1} {'type': 'loss', 'content': 0.0072896406054496765, 'timestamp': '2025-09-10 02:17:53.607995', 'step': 938, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 6644714900096}, 'timestamp': '2025-09-10 02:17:53.639895', 'step': 938, 'epoch': 1} {'type': 'loss', 'content': 0.01063856016844511, 'timestamp': '2025-09-10 02:17:53.646943', 'step': 939, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 7119318626112}, 'timestamp': '2025-09-10 02:17:53.677247', 'step': 939, 'epoch': 1} {'type': 'loss', 'content': 0.012549477629363537, 'timestamp': '2025-09-10 02:17:53.705519', 'step': 940, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 416], 'flops': 12339959612288}, 'timestamp': '2025-09-10 02:17:53.742721', 'step': 940, 'epoch': 1} {'type': 'loss', 'content': 0.007854852825403214, 'timestamp': '2025-09-10 02:17:53.758153', 'step': 941, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 7119318626112}, 'timestamp': '2025-09-10 02:17:53.789869', 'step': 941, 'epoch': 1} {'type': 'loss', 'content': 0.008021929301321507, 'timestamp': '2025-09-10 02:17:53.797362', 'step': 942, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 7593922352128}, 'timestamp': '2025-09-10 02:17:53.828747', 'step': 942, 'epoch': 1} {'type': 'loss', 'content': 0.03075227700173855, 'timestamp': '2025-09-10 02:17:53.836399', 'step': 943, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 5695507448064}, 'timestamp': '2025-09-10 02:17:53.867457', 'step': 943, 'epoch': 1} {'type': 'loss', 'content': 0.012394532561302185, 'timestamp': '2025-09-10 02:17:53.892784', 'step': 944, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 6170111174080}, 'timestamp': '2025-09-10 02:17:53.924195', 'step': 944, 'epoch': 1} {'type': 'loss', 'content': 0.025795314460992813, 'timestamp': '2025-09-10 02:17:53.928702', 'step': 945, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 7119318626112}, 'timestamp': '2025-09-10 02:17:53.965401', 'step': 945, 'epoch': 1} {'type': 'loss', 'content': 0.03643295168876648, 'timestamp': '2025-09-10 02:17:53.972943', 'step': 946, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 7119318626112}, 'timestamp': '2025-09-10 02:17:54.012422', 'step': 946, 'epoch': 1} {'type': 'loss', 'content': 0.01099133025854826, 'timestamp': '2025-09-10 02:17:54.019892', 'step': 947, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 6644714900096}, 'timestamp': '2025-09-10 02:17:54.055067', 'step': 947, 'epoch': 1} {'type': 'loss', 'content': 0.010035491548478603, 'timestamp': '2025-09-10 02:17:54.082908', 'step': 948, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 7593922352128}, 'timestamp': '2025-09-10 02:17:54.130125', 'step': 948, 'epoch': 1} {'type': 'loss', 'content': 0.018783031031489372, 'timestamp': '2025-09-10 02:17:54.135462', 'step': 949, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 6644714900096}, 'timestamp': '2025-09-10 02:17:54.172827', 'step': 949, 'epoch': 1} {'type': 'loss', 'content': 0.0065679592080414295, 'timestamp': '2025-09-10 02:17:54.179688', 'step': 950, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 7593922352128}, 'timestamp': '2025-09-10 02:17:54.227461', 'step': 950, 'epoch': 1} {'type': 'loss', 'content': 0.030873224139213562, 'timestamp': '2025-09-10 02:17:54.234850', 'step': 951, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 8543129804160}, 'timestamp': '2025-09-10 02:17:54.267001', 'step': 951, 'epoch': 1} {'type': 'loss', 'content': 0.00860142894089222, 'timestamp': '2025-09-10 02:17:54.298805', 'step': 952, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 7119318626112}, 'timestamp': '2025-09-10 02:17:54.329849', 'step': 952, 'epoch': 1} {'type': 'loss', 'content': 0.02184119261801243, 'timestamp': '2025-09-10 02:17:54.334802', 'step': 953, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 8068526078144}, 'timestamp': '2025-09-10 02:17:54.365625', 'step': 953, 'epoch': 1} {'type': 'loss', 'content': 0.009527009911835194, 'timestamp': '2025-09-10 02:17:54.375907', 'step': 954, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 336], 'flops': 9966940982208}, 'timestamp': '2025-09-10 02:17:54.408819', 'step': 954, 'epoch': 1} {'type': 'loss', 'content': 0.020068276673555374, 'timestamp': '2025-09-10 02:17:54.422151', 'step': 955, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 8068526078144}, 'timestamp': '2025-09-10 02:17:54.457236', 'step': 955, 'epoch': 1} {'type': 'loss', 'content': 0.010814903303980827, 'timestamp': '2025-09-10 02:17:54.488267', 'step': 956, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 176], 'flops': 5220903722048}, 'timestamp': '2025-09-10 02:17:54.520071', 'step': 956, 'epoch': 1} {'type': 'loss', 'content': 0.02371845953166485, 'timestamp': '2025-09-10 02:17:54.525943', 'step': 957, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 176], 'flops': 5220903722048}, 'timestamp': '2025-09-10 02:17:54.565905', 'step': 957, 'epoch': 1} {'type': 'loss', 'content': 0.029727578163146973, 'timestamp': '2025-09-10 02:17:54.571947', 'step': 958, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 4746299996032}, 'timestamp': '2025-09-10 02:17:54.603808', 'step': 958, 'epoch': 1} {'type': 'loss', 'content': 0.05049288645386696, 'timestamp': '2025-09-10 02:17:54.614183', 'step': 959, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 5695507448064}, 'timestamp': '2025-09-10 02:17:54.646310', 'step': 959, 'epoch': 1} {'type': 'loss', 'content': 0.00983439851552248, 'timestamp': '2025-09-10 02:17:54.671570', 'step': 960, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 7119318626112}, 'timestamp': '2025-09-10 02:17:54.705032', 'step': 960, 'epoch': 1} {'type': 'loss', 'content': 0.02309414930641651, 'timestamp': '2025-09-10 02:17:54.711307', 'step': 961, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 384], 'flops': 11390752160256}, 'timestamp': '2025-09-10 02:17:54.751183', 'step': 961, 'epoch': 1} {'type': 'loss', 'content': 0.0041077896021306515, 'timestamp': '2025-09-10 02:17:54.765144', 'step': 962, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 304], 'flops': 9017733530176}, 'timestamp': '2025-09-10 02:17:54.798115', 'step': 962, 'epoch': 1} {'type': 'loss', 'content': 0.0642273798584938, 'timestamp': '2025-09-10 02:17:54.810293', 'step': 963, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 368], 'flops': 10916148434240}, 'timestamp': '2025-09-10 02:17:54.845738', 'step': 963, 'epoch': 1} {'type': 'loss', 'content': 0.007594208233058453, 'timestamp': '2025-09-10 02:17:54.880449', 'step': 964, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 6644714900096}, 'timestamp': '2025-09-10 02:17:54.910901', 'step': 964, 'epoch': 1} {'type': 'loss', 'content': 0.021292701363563538, 'timestamp': '2025-09-10 02:17:54.915566', 'step': 965, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 176], 'flops': 5220903722048}, 'timestamp': '2025-09-10 02:17:54.947901', 'step': 965, 'epoch': 1} {'type': 'loss', 'content': 0.00916915200650692, 'timestamp': '2025-09-10 02:17:54.951819', 'step': 966, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 6644714900096}, 'timestamp': '2025-09-10 02:17:54.983075', 'step': 966, 'epoch': 1} {'type': 'loss', 'content': 0.006665355525910854, 'timestamp': '2025-09-10 02:17:54.990165', 'step': 967, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 320], 'flops': 9492337256192}, 'timestamp': '2025-09-10 02:17:55.025106', 'step': 967, 'epoch': 1} {'type': 'loss', 'content': 0.012122230604290962, 'timestamp': '2025-09-10 02:17:55.058652', 'step': 968, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 576], 'flops': 17085996872448}, 'timestamp': '2025-09-10 02:17:55.103815', 'step': 968, 'epoch': 1} {'type': 'loss', 'content': 0.0122977988794446, 'timestamp': '2025-09-10 02:17:55.123080', 'step': 969, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 8068526078144}, 'timestamp': '2025-09-10 02:17:55.154056', 'step': 969, 'epoch': 1} {'type': 'loss', 'content': 0.00949710514396429, 'timestamp': '2025-09-10 02:17:55.164111', 'step': 970, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 5695507448064}, 'timestamp': '2025-09-10 02:17:55.194022', 'step': 970, 'epoch': 1} {'type': 'loss', 'content': 0.018477456644177437, 'timestamp': '2025-09-10 02:17:55.198355', 'step': 971, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 7593922352128}, 'timestamp': '2025-09-10 02:17:55.229368', 'step': 971, 'epoch': 1} {'type': 'loss', 'content': 0.014558763243258, 'timestamp': '2025-09-10 02:17:55.257943', 'step': 972, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 352], 'flops': 10441544708224}, 'timestamp': '2025-09-10 02:17:55.294468', 'step': 972, 'epoch': 1} {'type': 'loss', 'content': 0.008184276521205902, 'timestamp': '2025-09-10 02:17:55.307496', 'step': 973, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-10 02:17:55.343894', 'step': 973, 'epoch': 1} {'type': 'loss', 'content': 0.017617663368582726, 'timestamp': '2025-09-10 02:17:55.349012', 'step': 974, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 176], 'flops': 5220903722048}, 'timestamp': '2025-09-10 02:17:55.385405', 'step': 974, 'epoch': 1} {'type': 'loss', 'content': 0.013045444153249264, 'timestamp': '2025-09-10 02:17:55.389583', 'step': 975, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 6644714900096}, 'timestamp': '2025-09-10 02:17:55.421516', 'step': 975, 'epoch': 1} {'type': 'loss', 'content': 0.02563711628317833, 'timestamp': '2025-09-10 02:17:55.451436', 'step': 976, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 5695507448064}, 'timestamp': '2025-09-10 02:17:55.481948', 'step': 976, 'epoch': 1} {'type': 'loss', 'content': 0.013601159676909447, 'timestamp': '2025-09-10 02:17:55.484011', 'step': 977, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-10 02:17:55.514755', 'step': 977, 'epoch': 1} {'type': 'loss', 'content': 0.026903489604592323, 'timestamp': '2025-09-10 02:17:55.517153', 'step': 978, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 5695507448064}, 'timestamp': '2025-09-10 02:17:55.548412', 'step': 978, 'epoch': 1} {'type': 'loss', 'content': 0.008285568095743656, 'timestamp': '2025-09-10 02:17:55.552794', 'step': 979, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 8068526078144}, 'timestamp': '2025-09-10 02:17:55.584495', 'step': 979, 'epoch': 1} {'type': 'loss', 'content': 0.008680049329996109, 'timestamp': '2025-09-10 02:17:55.615365', 'step': 980, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 7119318626112}, 'timestamp': '2025-09-10 02:17:55.646309', 'step': 980, 'epoch': 1} {'type': 'loss', 'content': 0.004374523181468248, 'timestamp': '2025-09-10 02:17:55.651294', 'step': 981, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 8543129804160}, 'timestamp': '2025-09-10 02:17:55.682655', 'step': 981, 'epoch': 1} {'type': 'loss', 'content': 0.018112564459443092, 'timestamp': '2025-09-10 02:17:55.693510', 'step': 982, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 176], 'flops': 5220903722048}, 'timestamp': '2025-09-10 02:17:55.723932', 'step': 982, 'epoch': 1} {'type': 'loss', 'content': 0.017343124374747276, 'timestamp': '2025-09-10 02:17:55.728098', 'step': 983, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 6644714900096}, 'timestamp': '2025-09-10 02:17:55.757844', 'step': 983, 'epoch': 1} {'type': 'loss', 'content': 0.009760797023773193, 'timestamp': '2025-09-10 02:17:55.785672', 'step': 984, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-10 02:17:55.816268', 'step': 984, 'epoch': 1} {'type': 'loss', 'content': 0.010807998478412628, 'timestamp': '2025-09-10 02:17:55.819473', 'step': 985, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 4746299996032}, 'timestamp': '2025-09-10 02:17:55.853423', 'step': 985, 'epoch': 1} {'type': 'loss', 'content': 0.024396957829594612, 'timestamp': '2025-09-10 02:17:55.856074', 'step': 986, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 7119318626112}, 'timestamp': '2025-09-10 02:17:55.887069', 'step': 986, 'epoch': 1} {'type': 'loss', 'content': 0.02711336500942707, 'timestamp': '2025-09-10 02:17:55.894496', 'step': 987, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 336], 'flops': 9966940982208}, 'timestamp': '2025-09-10 02:17:55.931416', 'step': 987, 'epoch': 1} {'type': 'loss', 'content': 0.006434720940887928, 'timestamp': '2025-09-10 02:17:55.965598', 'step': 988, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 5695507448064}, 'timestamp': '2025-09-10 02:17:56.000656', 'step': 988, 'epoch': 1} {'type': 'loss', 'content': 0.02933250367641449, 'timestamp': '2025-09-10 02:17:56.002861', 'step': 989, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 304], 'flops': 9017733530176}, 'timestamp': '2025-09-10 02:17:56.035652', 'step': 989, 'epoch': 1} {'type': 'loss', 'content': 0.04050236940383911, 'timestamp': '2025-09-10 02:17:56.047816', 'step': 990, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 6170111174080}, 'timestamp': '2025-09-10 02:17:56.083529', 'step': 990, 'epoch': 1} {'type': 'loss', 'content': 0.03573581948876381, 'timestamp': '2025-09-10 02:17:56.090451', 'step': 991, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 7119318626112}, 'timestamp': '2025-09-10 02:17:56.137426', 'step': 991, 'epoch': 1} {'type': 'loss', 'content': 0.013186642900109291, 'timestamp': '2025-09-10 02:17:56.165631', 'step': 992, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 6644714900096}, 'timestamp': '2025-09-10 02:17:56.198381', 'step': 992, 'epoch': 1} {'type': 'loss', 'content': 0.00814458541572094, 'timestamp': '2025-09-10 02:17:56.202916', 'step': 993, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 5695507448064}, 'timestamp': '2025-09-10 02:17:56.235284', 'step': 993, 'epoch': 1} {'type': 'loss', 'content': 0.04362935200333595, 'timestamp': '2025-09-10 02:17:56.239732', 'step': 994, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 7593922352128}, 'timestamp': '2025-09-10 02:17:56.272860', 'step': 994, 'epoch': 1} {'type': 'loss', 'content': 0.005004457198083401, 'timestamp': '2025-09-10 02:17:56.280773', 'step': 995, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 7593922352128}, 'timestamp': '2025-09-10 02:17:56.312241', 'step': 995, 'epoch': 1} {'type': 'loss', 'content': 0.026401042938232422, 'timestamp': '2025-09-10 02:17:56.340827', 'step': 996, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 304], 'flops': 9017733530176}, 'timestamp': '2025-09-10 02:17:56.371718', 'step': 996, 'epoch': 1} {'type': 'loss', 'content': 0.014147581532597542, 'timestamp': '2025-09-10 02:17:56.381443', 'step': 997, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 7119318626112}, 'timestamp': '2025-09-10 02:17:56.411960', 'step': 997, 'epoch': 1} {'type': 'loss', 'content': 0.005374276544898748, 'timestamp': '2025-09-10 02:17:56.419394', 'step': 998, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 7119318626112}, 'timestamp': '2025-09-10 02:17:56.450530', 'step': 998, 'epoch': 1} {'type': 'loss', 'content': 0.00914605613797903, 'timestamp': '2025-09-10 02:17:56.457781', 'step': 999, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 320], 'flops': 9492337256192}, 'timestamp': '2025-09-10 02:17:56.489128', 'step': 999, 'epoch': 1} {'type': 'loss', 'content': 0.01612034998834133, 'timestamp': '2025-09-10 02:17:56.522591', 'step': 1000, 'epoch': 1} {'type': 'info', 'content': 'Checkpoint saved at step 1000', 'timestamp': '2025-09-10 02:18:01.459465', 'step': 1000, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 4746299996032}, 'timestamp': '2025-09-10 02:18:01.491383', 'step': 1000, 'epoch': 1} {'type': 'loss', 'content': 0.0174104031175375, 'timestamp': '2025-09-10 02:18:01.494305', 'step': 1001, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 304], 'flops': 9017733530176}, 'timestamp': '2025-09-10 02:18:01.528010', 'step': 1001, 'epoch': 1} {'type': 'loss', 'content': 0.026182083413004875, 'timestamp': '2025-09-10 02:18:01.539594', 'step': 1002, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 176], 'flops': 5220903722048}, 'timestamp': '2025-09-10 02:18:01.570881', 'step': 1002, 'epoch': 1} {'type': 'loss', 'content': 0.03149298205971718, 'timestamp': '2025-09-10 02:18:01.574600', 'step': 1003, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 8068526078144}, 'timestamp': '2025-09-10 02:18:01.606718', 'step': 1003, 'epoch': 1} {'type': 'loss', 'content': 0.02260902337729931, 'timestamp': '2025-09-10 02:18:01.637592', 'step': 1004, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 8068526078144}, 'timestamp': '2025-09-10 02:18:01.668452', 'step': 1004, 'epoch': 1} {'type': 'loss', 'content': 0.009301579557359219, 'timestamp': '2025-09-10 02:18:01.676113', 'step': 1005, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 6644714900096}, 'timestamp': '2025-09-10 02:18:01.707010', 'step': 1005, 'epoch': 1} {'type': 'loss', 'content': 0.017554203048348427, 'timestamp': '2025-09-10 02:18:01.714003', 'step': 1006, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 176], 'flops': 5220903722048}, 'timestamp': '2025-09-10 02:18:01.744576', 'step': 1006, 'epoch': 1} {'type': 'loss', 'content': 0.02697034180164337, 'timestamp': '2025-09-10 02:18:01.748615', 'step': 1007, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 6644714900096}, 'timestamp': '2025-09-10 02:18:01.778798', 'step': 1007, 'epoch': 1} {'type': 'loss', 'content': 0.013858512975275517, 'timestamp': '2025-09-10 02:18:01.806623', 'step': 1008, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 7119318626112}, 'timestamp': '2025-09-10 02:18:01.837166', 'step': 1008, 'epoch': 1} {'type': 'loss', 'content': 0.01846943609416485, 'timestamp': '2025-09-10 02:18:01.842178', 'step': 1009, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 6170111174080}, 'timestamp': '2025-09-10 02:18:01.872899', 'step': 1009, 'epoch': 1} {'type': 'loss', 'content': 0.028651878237724304, 'timestamp': '2025-09-10 02:18:01.879770', 'step': 1010, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 8068526078144}, 'timestamp': '2025-09-10 02:18:01.910494', 'step': 1010, 'epoch': 1} {'type': 'loss', 'content': 0.015424097888171673, 'timestamp': '2025-09-10 02:18:01.920098', 'step': 1011, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 176], 'flops': 5220903722048}, 'timestamp': '2025-09-10 02:18:01.954475', 'step': 1011, 'epoch': 1} {'type': 'loss', 'content': 0.03452470153570175, 'timestamp': '2025-09-10 02:18:01.979541', 'step': 1012, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 5695507448064}, 'timestamp': '2025-09-10 02:18:02.009239', 'step': 1012, 'epoch': 1} {'type': 'loss', 'content': 0.01232621818780899, 'timestamp': '2025-09-10 02:18:02.011397', 'step': 1013, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 5695507448064}, 'timestamp': '2025-09-10 02:18:02.040787', 'step': 1013, 'epoch': 1} {'type': 'loss', 'content': 0.006808358710259199, 'timestamp': '2025-09-10 02:18:02.045323', 'step': 1014, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 8543129804160}, 'timestamp': '2025-09-10 02:18:02.076071', 'step': 1014, 'epoch': 1} {'type': 'loss', 'content': 0.011624851264059544, 'timestamp': '2025-09-10 02:18:02.086817', 'step': 1015, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 7593922352128}, 'timestamp': '2025-09-10 02:18:02.117025', 'step': 1015, 'epoch': 1} {'type': 'loss', 'content': 0.024632567539811134, 'timestamp': '2025-09-10 02:18:02.145882', 'step': 1016, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 6644714900096}, 'timestamp': '2025-09-10 02:18:02.176481', 'step': 1016, 'epoch': 1} {'type': 'loss', 'content': 0.017971431836485863, 'timestamp': '2025-09-10 02:18:02.181117', 'step': 1017, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 8543129804160}, 'timestamp': '2025-09-10 02:18:02.214734', 'step': 1017, 'epoch': 1} {'type': 'loss', 'content': 0.0073992046527564526, 'timestamp': '2025-09-10 02:18:02.225624', 'step': 1018, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 416], 'flops': 12339959612288}, 'timestamp': '2025-09-10 02:18:02.263494', 'step': 1018, 'epoch': 1} {'type': 'loss', 'content': 0.01738920249044895, 'timestamp': '2025-09-10 02:18:02.279509', 'step': 1019, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 4746299996032}, 'timestamp': '2025-09-10 02:18:02.310208', 'step': 1019, 'epoch': 1} {'type': 'loss', 'content': 0.010011442936956882, 'timestamp': '2025-09-10 02:18:02.333868', 'step': 1020, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-10 02:18:02.363806', 'step': 1020, 'epoch': 1} {'type': 'loss', 'content': 0.01954047754406929, 'timestamp': '2025-09-10 02:18:02.366349', 'step': 1021, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 6644714900096}, 'timestamp': '2025-09-10 02:18:02.396560', 'step': 1021, 'epoch': 1} {'type': 'loss', 'content': 0.021268155425786972, 'timestamp': '2025-09-10 02:18:02.403748', 'step': 1022, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 320], 'flops': 9492337256192}, 'timestamp': '2025-09-10 02:18:02.434350', 'step': 1022, 'epoch': 1} {'type': 'loss', 'content': 0.023624001070857048, 'timestamp': '2025-09-10 02:18:02.446757', 'step': 1023, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 384], 'flops': 11390752160256}, 'timestamp': '2025-09-10 02:18:02.487085', 'step': 1023, 'epoch': 1} {'type': 'loss', 'content': 0.025291163474321365, 'timestamp': '2025-09-10 02:18:02.517046', 'step': 1024, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 416], 'flops': 12339959612288}, 'timestamp': '2025-09-10 02:18:02.552710', 'step': 1024, 'epoch': 1} {'type': 'loss', 'content': 0.014068282209336758, 'timestamp': '2025-09-10 02:18:02.568165', 'step': 1025, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 336], 'flops': 9966940982208}, 'timestamp': '2025-09-10 02:18:02.602462', 'step': 1025, 'epoch': 1} {'type': 'loss', 'content': 0.01854473166167736, 'timestamp': '2025-09-10 02:18:02.615856', 'step': 1026, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 304], 'flops': 9017733530176}, 'timestamp': '2025-09-10 02:18:02.648053', 'step': 1026, 'epoch': 1} {'type': 'loss', 'content': 0.007462997920811176, 'timestamp': '2025-09-10 02:18:02.660020', 'step': 1027, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 384], 'flops': 11390752160256}, 'timestamp': '2025-09-10 02:18:02.695010', 'step': 1027, 'epoch': 1} {'type': 'loss', 'content': 0.008165022358298302, 'timestamp': '2025-09-10 02:18:02.729907', 'step': 1028, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 8543129804160}, 'timestamp': '2025-09-10 02:18:02.759893', 'step': 1028, 'epoch': 1} {'type': 'loss', 'content': 0.00575115904211998, 'timestamp': '2025-09-10 02:18:02.768441', 'step': 1029, 'epoch': 1} {'type': 'flops', 'content': [{'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5695213340928}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3796808916480}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4429610391296}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4746011128704}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4746011128704}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6011614078336}, {'type': 'perplexity', 'in_batch_dim': [8, 736], 'batch_size': 8, 'flops': 14554433988352}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3796808916480}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4113209653888}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3796808916480}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4746011128704}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2847606704256}, {'type': 'perplexity', 'in_batch_dim': [8, 320], 'batch_size': 8, 'flops': 6328014815744}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5378812603520}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3796808916480}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4746011128704}, {'type': 'perplexity', 'in_batch_dim': [8, 352], 'batch_size': 8, 'flops': 6960816290560}, {'type': 'perplexity', 'in_batch_dim': [8, 384], 'batch_size': 8, 'flops': 7593617765376}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5378812603520}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4429610391296}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4113209653888}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5062411866112}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5062411866112}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4429610391296}, {'type': 'perplexity', 'in_batch_dim': [8, 480], 'batch_size': 8, 'flops': 9492022189824}, {'type': 'perplexity', 'in_batch_dim': [8, 448], 'batch_size': 8, 'flops': 8859220715008}, {'type': 'perplexity', 'in_batch_dim': [8, 320], 'batch_size': 8, 'flops': 6328014815744}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3796808916480}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6011614078336}, {'type': 'perplexity', 'in_batch_dim': [8, 320], 'batch_size': 8, 'flops': 6328014815744}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5695213340928}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4746011128704}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4429610391296}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5378812603520}, {'type': 'perplexity', 'in_batch_dim': [8, 544], 'batch_size': 8, 'flops': 10757625139456}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4429610391296}, {'type': 'perplexity', 'in_batch_dim': [8, 320], 'batch_size': 8, 'flops': 6328014815744}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4113209653888}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4113209653888}, {'type': 'perplexity', 'in_batch_dim': [8, 320], 'batch_size': 8, 'flops': 6328014815744}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3480408179072}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5062411866112}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6011614078336}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4429610391296}, {'type': 'perplexity', 'in_batch_dim': [8, 384], 'batch_size': 8, 'flops': 7593617765376}, {'type': 'perplexity', 'in_batch_dim': [8, 416], 'batch_size': 8, 'flops': 8226419240192}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4746011128704}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3480408179072}, {'type': 'perplexity', 'in_batch_dim': [8, 384], 'batch_size': 8, 'flops': 7593617765376}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3480408179072}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3480408179072}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5378812603520}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3796808916480}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4113209653888}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5062411866112}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5378812603520}, {'type': 'perplexity', 'in_batch_dim': [8, 320], 'batch_size': 8, 'flops': 6328014815744}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4429610391296}, {'type': 'perplexity', 'in_batch_dim': [8, 160], 'batch_size': 8, 'flops': 3164007441664}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4113209653888}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5695213340928}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3796808916480}, {'type': 'perplexity', 'in_batch_dim': [8, 160], 'batch_size': 8, 'flops': 3164007441664}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6011614078336}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5062411866112}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4429610391296}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3480408179072}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4746011128704}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3480408179072}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4113209653888}, {'type': 'perplexity', 'in_batch_dim': [8, 624], 'batch_size': 8, 'flops': 12339628826496}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4113209653888}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5695213340928}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5062411866112}, {'type': 'perplexity', 'in_batch_dim': [8, 384], 'batch_size': 8, 'flops': 7593617765376}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4429610391296}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4113209653888}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5062411866112}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4429610391296}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5695213340928}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3796808916480}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5378812603520}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4429610391296}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5378812603520}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3796808916480}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5695213340928}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4429610391296}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5062411866112}, {'type': 'perplexity', 'in_batch_dim': [8, 160], 'batch_size': 8, 'flops': 3164007441664}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4746011128704}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5695213340928}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3796808916480}, {'type': 'perplexity', 'in_batch_dim': [8, 416], 'batch_size': 8, 'flops': 8226419240192}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3796808916480}, {'type': 'perplexity', 'in_batch_dim': [8, 160], 'batch_size': 8, 'flops': 3164007441664}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4746011128704}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3796808916480}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2847606704256}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5378812603520}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4429610391296}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4113209653888}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4429610391296}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6011614078336}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4113209653888}, {'type': 'perplexity', 'in_batch_dim': [8, 320], 'batch_size': 8, 'flops': 6328014815744}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4746011128704}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4746011128704}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3796808916480}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5062411866112}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3480408179072}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3796808916480}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3480408179072}, {'type': 'perplexity', 'in_batch_dim': [8, 352], 'batch_size': 8, 'flops': 6960816290560}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5062411866112}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4429610391296}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6011614078336}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5062411866112}, {'type': 'perplexity', 'in_batch_dim': [8, 576], 'batch_size': 8, 'flops': 11390426614272}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4429610391296}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3796808916480}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6011614078336}, {'type': 'perplexity', 'in_batch_dim': [8, 432], 'batch_size': 8, 'flops': 8542819977600}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5378812603520}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5378812603520}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5062411866112}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4113209653888}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4113209653888}, {'type': 'perplexity', 'in_batch_dim': [8, 368], 'batch_size': 8, 'flops': 7277217027968}, {'type': 'perplexity', 'in_batch_dim': [8, 160], 'batch_size': 8, 'flops': 3164007441664}, {'type': 'perplexity', 'in_batch_dim': [8, 368], 'batch_size': 8, 'flops': 7277217027968}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3796808916480}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4746011128704}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4113209653888}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4429610391296}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5695213340928}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5695213340928}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5062411866112}, {'type': 'perplexity', 'in_batch_dim': [8, 432], 'batch_size': 8, 'flops': 8542819977600}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5062411866112}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4113209653888}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3480408179072}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3480408179072}, {'type': 'perplexity', 'in_batch_dim': [8, 160], 'batch_size': 8, 'flops': 3164007441664}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4113209653888}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3796808916480}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5062411866112}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 368], 'batch_size': 8, 'flops': 7277217027968}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5378812603520}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5378812603520}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5062411866112}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2847606704256}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4429610391296}, {'type': 'perplexity', 'in_batch_dim': [8, 320], 'batch_size': 8, 'flops': 6328014815744}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3796808916480}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4429610391296}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4113209653888}, {'type': 'perplexity', 'in_batch_dim': [8, 384], 'batch_size': 8, 'flops': 7593617765376}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5378812603520}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4746011128704}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3480408179072}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5062411866112}, {'type': 'perplexity', 'in_batch_dim': [8, 320], 'batch_size': 8, 'flops': 6328014815744}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5378812603520}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4113209653888}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4113209653888}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5695213340928}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3480408179072}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6011614078336}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4746011128704}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5378812603520}, {'type': 'perplexity', 'in_batch_dim': [8, 320], 'batch_size': 8, 'flops': 6328014815744}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4429610391296}, {'type': 'perplexity', 'in_batch_dim': [8, 320], 'batch_size': 8, 'flops': 6328014815744}, {'type': 'perplexity', 'in_batch_dim': [8, 416], 'batch_size': 8, 'flops': 8226419240192}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5062411866112}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5062411866112}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2847606704256}, {'type': 'perplexity', 'in_batch_dim': [8, 336], 'batch_size': 8, 'flops': 6644415553152}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6011614078336}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5062411866112}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4113209653888}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5695213340928}, {'type': 'perplexity', 'in_batch_dim': [8, 416], 'batch_size': 8, 'flops': 8226419240192}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3796808916480}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5695213340928}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4746011128704}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4429610391296}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4113209653888}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3480408179072}, {'type': 'perplexity', 'in_batch_dim': [8, 320], 'batch_size': 8, 'flops': 6328014815744}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3480408179072}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5378812603520}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6011614078336}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4429610391296}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5695213340928}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5695213340928}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3480408179072}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4429610391296}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5062411866112}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3480408179072}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4113209653888}, {'type': 'perplexity', 'in_batch_dim': [8, 352], 'batch_size': 8, 'flops': 6960816290560}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4113209653888}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2847606704256}, {'type': 'perplexity', 'in_batch_dim': [8, 160], 'batch_size': 8, 'flops': 3164007441664}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3796808916480}, {'type': 'perplexity', 'in_batch_dim': [8, 496], 'batch_size': 8, 'flops': 9808422927232}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3480408179072}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4113209653888}, {'type': 'perplexity', 'in_batch_dim': [8, 544], 'batch_size': 8, 'flops': 10757625139456}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6011614078336}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5378812603520}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5695213340928}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4746011128704}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5695213340928}, {'type': 'perplexity', 'in_batch_dim': [8, 384], 'batch_size': 8, 'flops': 7593617765376}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3480408179072}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5378812603520}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3480408179072}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5378812603520}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5062411866112}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3796808916480}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5062411866112}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5695213340928}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5378812603520}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4429610391296}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5062411866112}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3480408179072}, {'type': 'perplexity', 'in_batch_dim': [8, 656], 'batch_size': 8, 'flops': 12972430301312}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5062411866112}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3796808916480}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3480408179072}, {'type': 'perplexity', 'in_batch_dim': [8, 160], 'batch_size': 8, 'flops': 3164007441664}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5695213340928}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6011614078336}, {'type': 'perplexity', 'in_batch_dim': [8, 320], 'batch_size': 8, 'flops': 6328014815744}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5062411866112}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5062411866112}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5695213340928}, {'type': 'perplexity', 'in_batch_dim': [8, 320], 'batch_size': 8, 'flops': 6328014815744}, {'type': 'perplexity', 'in_batch_dim': [8, 384], 'batch_size': 8, 'flops': 7593617765376}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5062411866112}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4746011128704}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4113209653888}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4746011128704}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4746011128704}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3480408179072}, {'type': 'perplexity', 'in_batch_dim': [8, 320], 'batch_size': 8, 'flops': 6328014815744}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5695213340928}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5062411866112}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4113209653888}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4746011128704}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2847606704256}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3796808916480}, {'type': 'perplexity', 'in_batch_dim': [8, 336], 'batch_size': 8, 'flops': 6644415553152}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4113209653888}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4746011128704}, {'type': 'perplexity', 'in_batch_dim': [8, 496], 'batch_size': 8, 'flops': 9808422927232}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5378812603520}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5378812603520}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4429610391296}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5062411866112}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6011614078336}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4746011128704}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3796808916480}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4113209653888}, {'type': 'perplexity', 'in_batch_dim': [8, 496], 'batch_size': 8, 'flops': 9808422927232}, {'type': 'perplexity', 'in_batch_dim': [8, 448], 'batch_size': 8, 'flops': 8859220715008}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6011614078336}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3796808916480}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3480408179072}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5062411866112}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4429610391296}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4113209653888}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4746011128704}, {'type': 'perplexity', 'in_batch_dim': [8, 320], 'batch_size': 8, 'flops': 6328014815744}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6011614078336}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3480408179072}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3796808916480}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3796808916480}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4746011128704}, {'type': 'perplexity', 'in_batch_dim': [8, 416], 'batch_size': 8, 'flops': 8226419240192}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5695213340928}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6011614078336}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6011614078336}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4746011128704}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3480408179072}, {'type': 'perplexity', 'in_batch_dim': [8, 160], 'batch_size': 8, 'flops': 3164007441664}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5695213340928}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3480408179072}, {'type': 'perplexity', 'in_batch_dim': [8, 400], 'batch_size': 8, 'flops': 7910018502784}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4429610391296}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5378812603520}, {'type': 'perplexity', 'in_batch_dim': [8, 400], 'batch_size': 8, 'flops': 7910018502784}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5378812603520}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3480408179072}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6011614078336}, {'type': 'perplexity', 'in_batch_dim': [8, 160], 'batch_size': 8, 'flops': 3164007441664}, {'type': 'perplexity', 'in_batch_dim': [8, 352], 'batch_size': 8, 'flops': 6960816290560}, {'type': 'perplexity', 'in_batch_dim': [8, 320], 'batch_size': 8, 'flops': 6328014815744}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5062411866112}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5062411866112}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4113209653888}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4429610391296}, {'type': 'perplexity', 'in_batch_dim': [8, 352], 'batch_size': 8, 'flops': 6960816290560}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4113209653888}, {'type': 'perplexity', 'in_batch_dim': [8, 160], 'batch_size': 8, 'flops': 3164007441664}, {'type': 'perplexity', 'in_batch_dim': [8, 336], 'batch_size': 8, 'flops': 6644415553152}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4429610391296}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3796808916480}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5378812603520}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5378812603520}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4113209653888}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5378812603520}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3796808916480}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5378812603520}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5378812603520}, {'type': 'perplexity', 'in_batch_dim': [8, 160], 'batch_size': 8, 'flops': 3164007441664}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3480408179072}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6011614078336}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5695213340928}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5378812603520}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5378812603520}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4429610391296}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4746011128704}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6011614078336}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5062411866112}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6011614078336}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5378812603520}, {'type': 'perplexity', 'in_batch_dim': [8, 160], 'batch_size': 8, 'flops': 3164007441664}, {'type': 'perplexity', 'in_batch_dim': [8, 160], 'batch_size': 8, 'flops': 3164007441664}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5062411866112}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5062411866112}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3796808916480}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6011614078336}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4113209653888}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4746011128704}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5378812603520}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3480408179072}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3796808916480}, {'type': 'perplexity', 'in_batch_dim': [8, 320], 'batch_size': 8, 'flops': 6328014815744}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5062411866112}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4746011128704}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5062411866112}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4746011128704}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6011614078336}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3796808916480}, {'type': 'perplexity', 'in_batch_dim': [8, 160], 'batch_size': 8, 'flops': 3164007441664}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2847606704256}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4429610391296}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4746011128704}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5062411866112}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5378812603520}, {'type': 'perplexity', 'in_batch_dim': [8, 464], 'batch_size': 8, 'flops': 9175621452416}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4113209653888}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3480408179072}, {'type': 'perplexity', 'in_batch_dim': [8, 384], 'batch_size': 8, 'flops': 7593617765376}, {'type': 'perplexity', 'in_batch_dim': [8, 448], 'batch_size': 8, 'flops': 8859220715008}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5378812603520}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4113209653888}, {'type': 'perplexity', 'in_batch_dim': [8, 320], 'batch_size': 8, 'flops': 6328014815744}, {'type': 'perplexity', 'in_batch_dim': [8, 560], 'batch_size': 8, 'flops': 11074025876864}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5378812603520}, {'type': 'perplexity', 'in_batch_dim': [8, 160], 'batch_size': 8, 'flops': 3164007441664}, {'type': 'perplexity', 'in_batch_dim': [8, 320], 'batch_size': 8, 'flops': 6328014815744}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5062411866112}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6011614078336}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4746011128704}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6011614078336}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3796808916480}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4429610391296}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4113209653888}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4746011128704}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3480408179072}, {'type': 'perplexity', 'in_batch_dim': [8, 384], 'batch_size': 8, 'flops': 7593617765376}, {'type': 'perplexity', 'in_batch_dim': [8, 576], 'batch_size': 8, 'flops': 11390426614272}, {'type': 'perplexity', 'in_batch_dim': [8, 160], 'batch_size': 8, 'flops': 3164007441664}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4746011128704}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4746011128704}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4429610391296}, {'type': 'perplexity', 'in_batch_dim': [8, 336], 'batch_size': 8, 'flops': 6644415553152}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4746011128704}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3796808916480}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3796808916480}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5062411866112}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5378812603520}, {'type': 'perplexity', 'in_batch_dim': [8, 368], 'batch_size': 8, 'flops': 7277217027968}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3480408179072}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4113209653888}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4746011128704}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3480408179072}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4746011128704}, {'type': 'perplexity', 'in_batch_dim': [8, 1168], 'batch_size': 8, 'flops': 23097253898368}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5062411866112}, {'type': 'perplexity', 'in_batch_dim': [8, 160], 'batch_size': 8, 'flops': 3164007441664}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4746011128704}, {'type': 'perplexity', 'in_batch_dim': [8, 640], 'batch_size': 8, 'flops': 12656029563904}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5378812603520}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4429610391296}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4429610391296}, {'type': 'perplexity', 'in_batch_dim': [8, 384], 'batch_size': 8, 'flops': 7593617765376}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3480408179072}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4113209653888}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3796808916480}, {'type': 'perplexity', 'in_batch_dim': [8, 336], 'batch_size': 8, 'flops': 6644415553152}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3480408179072}, {'type': 'perplexity', 'in_batch_dim': [6, 208], 'batch_size': 8, 'flops': 4113209653888}], 'timestamp': '2025-09-10 02:18:12.958862', 'step': 1029, 'epoch': 1} {'type': 'pplx', 'content': 12598956.534986155, 'timestamp': '2025-09-10 02:18:12.961843', 'step': 1029, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 8068526078144}, 'timestamp': '2025-09-10 02:18:12.993434', 'step': 1029, 'epoch': 1} {'type': 'loss', 'content': 0.025716153904795647, 'timestamp': '2025-09-10 02:18:13.001709', 'step': 1030, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 5695507448064}, 'timestamp': '2025-09-10 02:18:13.032761', 'step': 1030, 'epoch': 1} {'type': 'loss', 'content': 0.011754123494029045, 'timestamp': '2025-09-10 02:18:13.037008', 'step': 1031, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 4746299996032}, 'timestamp': '2025-09-10 02:18:13.067369', 'step': 1031, 'epoch': 1} {'type': 'loss', 'content': 0.01727590523660183, 'timestamp': '2025-09-10 02:18:13.091208', 'step': 1032, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 6644714900096}, 'timestamp': '2025-09-10 02:18:13.122124', 'step': 1032, 'epoch': 1} {'type': 'loss', 'content': 0.014728769659996033, 'timestamp': '2025-09-10 02:18:13.126760', 'step': 1033, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 4746299996032}, 'timestamp': '2025-09-10 02:18:13.157423', 'step': 1033, 'epoch': 1} {'type': 'loss', 'content': 0.01519166398793459, 'timestamp': '2025-09-10 02:18:13.160059', 'step': 1034, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 7119318626112}, 'timestamp': '2025-09-10 02:18:13.190660', 'step': 1034, 'epoch': 1} {'type': 'loss', 'content': 0.003654760541394353, 'timestamp': '2025-09-10 02:18:13.198324', 'step': 1035, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 6644714900096}, 'timestamp': '2025-09-10 02:18:13.229987', 'step': 1035, 'epoch': 1} {'type': 'loss', 'content': 0.013432272709906101, 'timestamp': '2025-09-10 02:18:13.257838', 'step': 1036, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 320], 'flops': 9492337256192}, 'timestamp': '2025-09-10 02:18:13.289765', 'step': 1036, 'epoch': 1} {'type': 'loss', 'content': 0.017557019367814064, 'timestamp': '2025-09-10 02:18:13.300314', 'step': 1037, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 176], 'flops': 5220903722048}, 'timestamp': '2025-09-10 02:18:13.331706', 'step': 1037, 'epoch': 1} {'type': 'loss', 'content': 0.008299489505589008, 'timestamp': '2025-09-10 02:18:13.335575', 'step': 1038, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 5695507448064}, 'timestamp': '2025-09-10 02:18:13.366588', 'step': 1038, 'epoch': 1} {'type': 'loss', 'content': 0.008687845431268215, 'timestamp': '2025-09-10 02:18:13.371045', 'step': 1039, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 304], 'flops': 9017733530176}, 'timestamp': '2025-09-10 02:18:13.402670', 'step': 1039, 'epoch': 1} {'type': 'loss', 'content': 0.023219764232635498, 'timestamp': '2025-09-10 02:18:13.435613', 'step': 1040, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 384], 'flops': 11390752160256}, 'timestamp': '2025-09-10 02:18:13.469380', 'step': 1040, 'epoch': 1} {'type': 'loss', 'content': 0.007147731725126505, 'timestamp': '2025-09-10 02:18:13.482656', 'step': 1041, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 8068526078144}, 'timestamp': '2025-09-10 02:18:13.515176', 'step': 1041, 'epoch': 1} {'type': 'loss', 'content': 0.025176668539643288, 'timestamp': '2025-09-10 02:18:13.525281', 'step': 1042, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 7119318626112}, 'timestamp': '2025-09-10 02:18:13.556759', 'step': 1042, 'epoch': 1} {'type': 'loss', 'content': 0.030310701578855515, 'timestamp': '2025-09-10 02:18:13.564096', 'step': 1043, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 7119318626112}, 'timestamp': '2025-09-10 02:18:13.595495', 'step': 1043, 'epoch': 1} {'type': 'loss', 'content': 0.004898954648524523, 'timestamp': '2025-09-10 02:18:13.623811', 'step': 1044, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 304], 'flops': 9017733530176}, 'timestamp': '2025-09-10 02:18:13.654800', 'step': 1044, 'epoch': 1} {'type': 'loss', 'content': 0.0060010491870343685, 'timestamp': '2025-09-10 02:18:13.664468', 'step': 1045, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 432], 'flops': 12814563338304}, 'timestamp': '2025-09-10 02:18:13.704827', 'step': 1045, 'epoch': 1} {'type': 'loss', 'content': 0.00663131894543767, 'timestamp': '2025-09-10 02:18:13.721022', 'step': 1046, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 7119318626112}, 'timestamp': '2025-09-10 02:18:13.752850', 'step': 1046, 'epoch': 1} {'type': 'loss', 'content': 0.004566808696836233, 'timestamp': '2025-09-10 02:18:13.760384', 'step': 1047, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 6644714900096}, 'timestamp': '2025-09-10 02:18:13.791912', 'step': 1047, 'epoch': 1} {'type': 'loss', 'content': 0.034333836287260056, 'timestamp': '2025-09-10 02:18:13.819760', 'step': 1048, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 304], 'flops': 9017733530176}, 'timestamp': '2025-09-10 02:18:13.853412', 'step': 1048, 'epoch': 1} {'type': 'loss', 'content': 0.02081063576042652, 'timestamp': '2025-09-10 02:18:13.863158', 'step': 1049, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 7119318626112}, 'timestamp': '2025-09-10 02:18:13.895760', 'step': 1049, 'epoch': 1} {'type': 'loss', 'content': 0.02265256643295288, 'timestamp': '2025-09-10 02:18:13.903213', 'step': 1050, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 8068526078144}, 'timestamp': '2025-09-10 02:18:13.936643', 'step': 1050, 'epoch': 1} {'type': 'loss', 'content': 0.023109683766961098, 'timestamp': '2025-09-10 02:18:13.946822', 'step': 1051, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 336], 'flops': 9966940982208}, 'timestamp': '2025-09-10 02:18:13.981992', 'step': 1051, 'epoch': 1} {'type': 'loss', 'content': 0.008987885899841785, 'timestamp': '2025-09-10 02:18:14.016298', 'step': 1052, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 176], 'flops': 5220903722048}, 'timestamp': '2025-09-10 02:18:14.049436', 'step': 1052, 'epoch': 1} {'type': 'loss', 'content': 0.015795622020959854, 'timestamp': '2025-09-10 02:18:14.051658', 'step': 1053, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 336], 'flops': 9966940982208}, 'timestamp': '2025-09-10 02:18:14.086452', 'step': 1053, 'epoch': 1} {'type': 'loss', 'content': 0.0090614790096879, 'timestamp': '2025-09-10 02:18:14.099780', 'step': 1054, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 8068526078144}, 'timestamp': '2025-09-10 02:18:14.131728', 'step': 1054, 'epoch': 1} {'type': 'loss', 'content': 0.0187073964625597, 'timestamp': '2025-09-10 02:18:14.141814', 'step': 1055, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 7593922352128}, 'timestamp': '2025-09-10 02:18:14.174157', 'step': 1055, 'epoch': 1} {'type': 'loss', 'content': 0.02702743373811245, 'timestamp': '2025-09-10 02:18:14.202776', 'step': 1056, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 7593922352128}, 'timestamp': '2025-09-10 02:18:14.233455', 'step': 1056, 'epoch': 1} {'type': 'loss', 'content': 0.02460920810699463, 'timestamp': '2025-09-10 02:18:14.239029', 'step': 1057, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 7593922352128}, 'timestamp': '2025-09-10 02:18:14.270798', 'step': 1057, 'epoch': 1} {'type': 'loss', 'content': 0.009660584852099419, 'timestamp': '2025-09-10 02:18:14.278697', 'step': 1058, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 7593922352128}, 'timestamp': '2025-09-10 02:18:14.310361', 'step': 1058, 'epoch': 1} {'type': 'loss', 'content': 0.020776310935616493, 'timestamp': '2025-09-10 02:18:14.318047', 'step': 1059, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 7593922352128}, 'timestamp': '2025-09-10 02:18:14.349936', 'step': 1059, 'epoch': 1} {'type': 'loss', 'content': 0.02294449508190155, 'timestamp': '2025-09-10 02:18:14.378601', 'step': 1060, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 5695507448064}, 'timestamp': '2025-09-10 02:18:14.410728', 'step': 1060, 'epoch': 1} {'type': 'loss', 'content': 0.017590373754501343, 'timestamp': '2025-09-10 02:18:14.413245', 'step': 1061, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 352], 'flops': 10441544708224}, 'timestamp': '2025-09-10 02:18:14.448846', 'step': 1061, 'epoch': 1} {'type': 'loss', 'content': 0.006449908018112183, 'timestamp': '2025-09-10 02:18:14.462548', 'step': 1062, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 8068526078144}, 'timestamp': '2025-09-10 02:18:14.494747', 'step': 1062, 'epoch': 1} {'type': 'loss', 'content': 0.03565583750605583, 'timestamp': '2025-09-10 02:18:14.505002', 'step': 1063, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 480], 'flops': 14238374516352}, 'timestamp': '2025-09-10 02:18:14.547850', 'step': 1063, 'epoch': 1} {'type': 'loss', 'content': 0.0327337309718132, 'timestamp': '2025-09-10 02:18:14.586097', 'step': 1064, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 6644714900096}, 'timestamp': '2025-09-10 02:18:14.617996', 'step': 1064, 'epoch': 1} {'type': 'loss', 'content': 0.02565723843872547, 'timestamp': '2025-09-10 02:18:14.622747', 'step': 1065, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 6170111174080}, 'timestamp': '2025-09-10 02:18:14.654346', 'step': 1065, 'epoch': 1} {'type': 'loss', 'content': 0.009299799799919128, 'timestamp': '2025-09-10 02:18:14.661114', 'step': 1066, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 6170111174080}, 'timestamp': '2025-09-10 02:18:14.692825', 'step': 1066, 'epoch': 1} {'type': 'loss', 'content': 0.014539425261318684, 'timestamp': '2025-09-10 02:18:14.699563', 'step': 1067, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 8068526078144}, 'timestamp': '2025-09-10 02:18:14.731919', 'step': 1067, 'epoch': 1} {'type': 'loss', 'content': 0.0058663212694227695, 'timestamp': '2025-09-10 02:18:14.762906', 'step': 1068, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 7119318626112}, 'timestamp': '2025-09-10 02:18:14.797242', 'step': 1068, 'epoch': 1} {'type': 'loss', 'content': 0.013086764141917229, 'timestamp': '2025-09-10 02:18:14.802463', 'step': 1069, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 6644714900096}, 'timestamp': '2025-09-10 02:18:14.833918', 'step': 1069, 'epoch': 1} {'type': 'loss', 'content': 0.006901100277900696, 'timestamp': '2025-09-10 02:18:14.841018', 'step': 1070, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 8068526078144}, 'timestamp': '2025-09-10 02:18:14.871397', 'step': 1070, 'epoch': 1} {'type': 'loss', 'content': 0.03987196460366249, 'timestamp': '2025-09-10 02:18:14.881603', 'step': 1071, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 7593922352128}, 'timestamp': '2025-09-10 02:18:14.913411', 'step': 1071, 'epoch': 1} {'type': 'loss', 'content': 0.019523393362760544, 'timestamp': '2025-09-10 02:18:14.941939', 'step': 1072, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 7593922352128}, 'timestamp': '2025-09-10 02:18:14.972877', 'step': 1072, 'epoch': 1} {'type': 'loss', 'content': 0.008477416820824146, 'timestamp': '2025-09-10 02:18:14.978088', 'step': 1073, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 6170111174080}, 'timestamp': '2025-09-10 02:18:15.009439', 'step': 1073, 'epoch': 1} {'type': 'loss', 'content': 0.00965914037078619, 'timestamp': '2025-09-10 02:18:15.016235', 'step': 1074, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 6170111174080}, 'timestamp': '2025-09-10 02:18:15.047147', 'step': 1074, 'epoch': 1} {'type': 'loss', 'content': 0.030519306659698486, 'timestamp': '2025-09-10 02:18:15.054225', 'step': 1075, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 5695507448064}, 'timestamp': '2025-09-10 02:18:15.084514', 'step': 1075, 'epoch': 1} {'type': 'loss', 'content': 0.006563273724168539, 'timestamp': '2025-09-10 02:18:15.109753', 'step': 1076, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 4746299996032}, 'timestamp': '2025-09-10 02:18:15.140018', 'step': 1076, 'epoch': 1} {'type': 'loss', 'content': 0.0034011027310043573, 'timestamp': '2025-09-10 02:18:15.142293', 'step': 1077, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 6644714900096}, 'timestamp': '2025-09-10 02:18:15.173398', 'step': 1077, 'epoch': 1} {'type': 'loss', 'content': 0.00994145218282938, 'timestamp': '2025-09-10 02:18:15.180346', 'step': 1078, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 7593922352128}, 'timestamp': '2025-09-10 02:18:15.211068', 'step': 1078, 'epoch': 1} {'type': 'loss', 'content': 0.00793201569467783, 'timestamp': '2025-09-10 02:18:15.218799', 'step': 1079, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 7593922352128}, 'timestamp': '2025-09-10 02:18:15.249649', 'step': 1079, 'epoch': 1} {'type': 'loss', 'content': 0.006165057886391878, 'timestamp': '2025-09-10 02:18:15.278386', 'step': 1080, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 6644714900096}, 'timestamp': '2025-09-10 02:18:15.309899', 'step': 1080, 'epoch': 1} {'type': 'loss', 'content': 0.017197439447045326, 'timestamp': '2025-09-10 02:18:15.314546', 'step': 1081, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 368], 'flops': 10916148434240}, 'timestamp': '2025-09-10 02:18:15.348968', 'step': 1081, 'epoch': 1} {'type': 'loss', 'content': 0.011048262938857079, 'timestamp': '2025-09-10 02:18:15.362821', 'step': 1082, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 7593922352128}, 'timestamp': '2025-09-10 02:18:15.395062', 'step': 1082, 'epoch': 1} {'type': 'loss', 'content': 0.016790146008133888, 'timestamp': '2025-09-10 02:18:15.402803', 'step': 1083, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 6170111174080}, 'timestamp': '2025-09-10 02:18:15.434194', 'step': 1083, 'epoch': 1} {'type': 'loss', 'content': 0.013046172447502613, 'timestamp': '2025-09-10 02:18:15.462016', 'step': 1084, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 8543129804160}, 'timestamp': '2025-09-10 02:18:15.493866', 'step': 1084, 'epoch': 1} {'type': 'loss', 'content': 0.015779945999383926, 'timestamp': '2025-09-10 02:18:15.501637', 'step': 1085, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 6170111174080}, 'timestamp': '2025-09-10 02:18:15.533252', 'step': 1085, 'epoch': 1} {'type': 'loss', 'content': 0.021666022017598152, 'timestamp': '2025-09-10 02:18:15.540188', 'step': 1086, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 8543129804160}, 'timestamp': '2025-09-10 02:18:15.571241', 'step': 1086, 'epoch': 1} {'type': 'loss', 'content': 0.007535271812230349, 'timestamp': '2025-09-10 02:18:15.581785', 'step': 1087, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 8068526078144}, 'timestamp': '2025-09-10 02:18:15.614685', 'step': 1087, 'epoch': 1} {'type': 'loss', 'content': 0.012627107091248035, 'timestamp': '2025-09-10 02:18:15.645687', 'step': 1088, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 7119318626112}, 'timestamp': '2025-09-10 02:18:15.676355', 'step': 1088, 'epoch': 1} {'type': 'loss', 'content': 0.021864308044314384, 'timestamp': '2025-09-10 02:18:15.681456', 'step': 1089, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 8543129804160}, 'timestamp': '2025-09-10 02:18:15.713779', 'step': 1089, 'epoch': 1} {'type': 'loss', 'content': 0.008793273940682411, 'timestamp': '2025-09-10 02:18:15.724719', 'step': 1090, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 6644714900096}, 'timestamp': '2025-09-10 02:18:15.756581', 'step': 1090, 'epoch': 1} {'type': 'loss', 'content': 0.0030675516463816166, 'timestamp': '2025-09-10 02:18:15.763522', 'step': 1091, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 8543129804160}, 'timestamp': '2025-09-10 02:18:15.794568', 'step': 1091, 'epoch': 1} {'type': 'loss', 'content': 0.009418687783181667, 'timestamp': '2025-09-10 02:18:15.826297', 'step': 1092, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 304], 'flops': 9017733530176}, 'timestamp': '2025-09-10 02:18:15.859361', 'step': 1092, 'epoch': 1} {'type': 'loss', 'content': 0.005299612879753113, 'timestamp': '2025-09-10 02:18:15.869132', 'step': 1093, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 6170111174080}, 'timestamp': '2025-09-10 02:18:15.899394', 'step': 1093, 'epoch': 1} {'type': 'loss', 'content': 0.018485212698578835, 'timestamp': '2025-09-10 02:18:15.906197', 'step': 1094, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 8068526078144}, 'timestamp': '2025-09-10 02:18:15.936910', 'step': 1094, 'epoch': 1} {'type': 'loss', 'content': 0.019424965605139732, 'timestamp': '2025-09-10 02:18:15.947055', 'step': 1095, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 176], 'flops': 5220903722048}, 'timestamp': '2025-09-10 02:18:15.978515', 'step': 1095, 'epoch': 1} {'type': 'loss', 'content': 0.012880226597189903, 'timestamp': '2025-09-10 02:18:16.003377', 'step': 1096, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 6170111174080}, 'timestamp': '2025-09-10 02:18:16.034727', 'step': 1096, 'epoch': 1} {'type': 'loss', 'content': 0.007060025352984667, 'timestamp': '2025-09-10 02:18:16.038943', 'step': 1097, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 320], 'flops': 9492337256192}, 'timestamp': '2025-09-10 02:18:16.069518', 'step': 1097, 'epoch': 1} {'type': 'loss', 'content': 0.0054059443064033985, 'timestamp': '2025-09-10 02:18:16.082033', 'step': 1098, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 400], 'flops': 11865355886272}, 'timestamp': '2025-09-10 02:18:16.122331', 'step': 1098, 'epoch': 1} {'type': 'loss', 'content': 0.018658744171261787, 'timestamp': '2025-09-10 02:18:16.138003', 'step': 1099, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 176], 'flops': 5220903722048}, 'timestamp': '2025-09-10 02:18:16.168621', 'step': 1099, 'epoch': 1} {'type': 'loss', 'content': 0.003897774498909712, 'timestamp': '2025-09-10 02:18:16.193726', 'step': 1100, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 7593922352128}, 'timestamp': '2025-09-10 02:18:16.224671', 'step': 1100, 'epoch': 1} {'type': 'loss', 'content': 0.009347101673483849, 'timestamp': '2025-09-10 02:18:16.230213', 'step': 1101, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 176], 'flops': 5220903722048}, 'timestamp': '2025-09-10 02:18:16.260845', 'step': 1101, 'epoch': 1} {'type': 'loss', 'content': 0.03616241365671158, 'timestamp': '2025-09-10 02:18:16.264931', 'step': 1102, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 336], 'flops': 9966940982208}, 'timestamp': '2025-09-10 02:18:16.298967', 'step': 1102, 'epoch': 1} {'type': 'loss', 'content': 0.006681836675852537, 'timestamp': '2025-09-10 02:18:16.312335', 'step': 1103, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 8068526078144}, 'timestamp': '2025-09-10 02:18:16.343253', 'step': 1103, 'epoch': 1} {'type': 'loss', 'content': 0.008854770101606846, 'timestamp': '2025-09-10 02:18:16.374312', 'step': 1104, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 5695507448064}, 'timestamp': '2025-09-10 02:18:16.405424', 'step': 1104, 'epoch': 1} {'type': 'loss', 'content': 0.004546549171209335, 'timestamp': '2025-09-10 02:18:16.407728', 'step': 1105, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 4746299996032}, 'timestamp': '2025-09-10 02:18:16.437927', 'step': 1105, 'epoch': 1} {'type': 'loss', 'content': 0.010041974484920502, 'timestamp': '2025-09-10 02:18:16.440422', 'step': 1106, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 352], 'flops': 10441544708224}, 'timestamp': '2025-09-10 02:18:16.474803', 'step': 1106, 'epoch': 1} {'type': 'loss', 'content': 0.0157835241407156, 'timestamp': '2025-09-10 02:18:16.488466', 'step': 1107, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 304], 'flops': 9017733530176}, 'timestamp': '2025-09-10 02:18:16.520358', 'step': 1107, 'epoch': 1} {'type': 'loss', 'content': 0.0020716842263936996, 'timestamp': '2025-09-10 02:18:16.553346', 'step': 1108, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 6170111174080}, 'timestamp': '2025-09-10 02:18:16.584568', 'step': 1108, 'epoch': 1} {'type': 'loss', 'content': 0.029057949781417847, 'timestamp': '2025-09-10 02:18:16.588795', 'step': 1109, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 7593922352128}, 'timestamp': '2025-09-10 02:18:16.621696', 'step': 1109, 'epoch': 1} {'type': 'loss', 'content': 0.02135612629354, 'timestamp': '2025-09-10 02:18:16.629582', 'step': 1110, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 5695507448064}, 'timestamp': '2025-09-10 02:18:16.660800', 'step': 1110, 'epoch': 1} {'type': 'loss', 'content': 0.0028702733106911182, 'timestamp': '2025-09-10 02:18:16.665292', 'step': 1111, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 176], 'flops': 5220903722048}, 'timestamp': '2025-09-10 02:18:16.695943', 'step': 1111, 'epoch': 1} {'type': 'loss', 'content': 0.013597295619547367, 'timestamp': '2025-09-10 02:18:16.720928', 'step': 1112, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 6170111174080}, 'timestamp': '2025-09-10 02:18:16.751987', 'step': 1112, 'epoch': 1} {'type': 'loss', 'content': 0.0027478632982820272, 'timestamp': '2025-09-10 02:18:16.756533', 'step': 1113, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 6644714900096}, 'timestamp': '2025-09-10 02:18:16.787835', 'step': 1113, 'epoch': 1} {'type': 'loss', 'content': 0.0023722779005765915, 'timestamp': '2025-09-10 02:18:16.794785', 'step': 1114, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 7119318626112}, 'timestamp': '2025-09-10 02:18:16.826397', 'step': 1114, 'epoch': 1} {'type': 'loss', 'content': 0.014487197622656822, 'timestamp': '2025-09-10 02:18:16.833719', 'step': 1115, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 7593922352128}, 'timestamp': '2025-09-10 02:18:16.866648', 'step': 1115, 'epoch': 1} {'type': 'loss', 'content': 0.015417618677020073, 'timestamp': '2025-09-10 02:18:16.895241', 'step': 1116, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 5695507448064}, 'timestamp': '2025-09-10 02:18:16.927334', 'step': 1116, 'epoch': 1} {'type': 'loss', 'content': 0.005695224739611149, 'timestamp': '2025-09-10 02:18:16.929586', 'step': 1117, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 5695507448064}, 'timestamp': '2025-09-10 02:18:16.959925', 'step': 1117, 'epoch': 1} {'type': 'loss', 'content': 0.04347721487283707, 'timestamp': '2025-09-10 02:18:16.964520', 'step': 1118, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 5695507448064}, 'timestamp': '2025-09-10 02:18:16.998114', 'step': 1118, 'epoch': 1} {'type': 'loss', 'content': 0.010189319029450417, 'timestamp': '2025-09-10 02:18:17.002800', 'step': 1119, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 7593922352128}, 'timestamp': '2025-09-10 02:18:17.034445', 'step': 1119, 'epoch': 1} {'type': 'loss', 'content': 0.020940367132425308, 'timestamp': '2025-09-10 02:18:17.063030', 'step': 1120, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 6644714900096}, 'timestamp': '2025-09-10 02:18:17.094110', 'step': 1120, 'epoch': 1} {'type': 'loss', 'content': 0.004970818292349577, 'timestamp': '2025-09-10 02:18:17.098676', 'step': 1121, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 176], 'flops': 5220903722048}, 'timestamp': '2025-09-10 02:18:17.131615', 'step': 1121, 'epoch': 1} {'type': 'loss', 'content': 0.004925449378788471, 'timestamp': '2025-09-10 02:18:17.135945', 'step': 1122, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 7593922352128}, 'timestamp': '2025-09-10 02:18:17.167238', 'step': 1122, 'epoch': 1} {'type': 'loss', 'content': 0.0076041617430746555, 'timestamp': '2025-09-10 02:18:17.174914', 'step': 1123, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 7593922352128}, 'timestamp': '2025-09-10 02:18:17.205923', 'step': 1123, 'epoch': 1} {'type': 'loss', 'content': 0.004425295628607273, 'timestamp': '2025-09-10 02:18:17.234476', 'step': 1124, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 336], 'flops': 9966940982208}, 'timestamp': '2025-09-10 02:18:17.266629', 'step': 1124, 'epoch': 1} {'type': 'loss', 'content': 0.022850140929222107, 'timestamp': '2025-09-10 02:18:17.279343', 'step': 1125, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 6644714900096}, 'timestamp': '2025-09-10 02:18:17.310177', 'step': 1125, 'epoch': 1} {'type': 'loss', 'content': 0.008794148452579975, 'timestamp': '2025-09-10 02:18:17.317476', 'step': 1126, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 176], 'flops': 5220903722048}, 'timestamp': '2025-09-10 02:18:17.347814', 'step': 1126, 'epoch': 1} {'type': 'loss', 'content': 0.02646883763372898, 'timestamp': '2025-09-10 02:18:17.351956', 'step': 1127, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 7119318626112}, 'timestamp': '2025-09-10 02:18:17.382198', 'step': 1127, 'epoch': 1} {'type': 'loss', 'content': 0.015476626344025135, 'timestamp': '2025-09-10 02:18:17.410749', 'step': 1128, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 336], 'flops': 9966940982208}, 'timestamp': '2025-09-10 02:18:17.443000', 'step': 1128, 'epoch': 1} {'type': 'loss', 'content': 0.0023650110233575106, 'timestamp': '2025-09-10 02:18:17.455617', 'step': 1129, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 7119318626112}, 'timestamp': '2025-09-10 02:18:17.487098', 'step': 1129, 'epoch': 1} {'type': 'loss', 'content': 0.0014028213918209076, 'timestamp': '2025-09-10 02:18:17.494611', 'step': 1130, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 8068526078144}, 'timestamp': '2025-09-10 02:18:17.525210', 'step': 1130, 'epoch': 1} {'type': 'loss', 'content': 0.006255102809518576, 'timestamp': '2025-09-10 02:18:17.535283', 'step': 1131, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 176], 'flops': 5220903722048}, 'timestamp': '2025-09-10 02:18:17.566091', 'step': 1131, 'epoch': 1} {'type': 'loss', 'content': 0.007090611848980188, 'timestamp': '2025-09-10 02:18:17.591020', 'step': 1132, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 6644714900096}, 'timestamp': '2025-09-10 02:18:17.622925', 'step': 1132, 'epoch': 1} {'type': 'loss', 'content': 0.015480038709938526, 'timestamp': '2025-09-10 02:18:17.627184', 'step': 1133, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 6644714900096}, 'timestamp': '2025-09-10 02:18:17.658081', 'step': 1133, 'epoch': 1} {'type': 'loss', 'content': 0.004326352383941412, 'timestamp': '2025-09-10 02:18:17.665220', 'step': 1134, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 6644714900096}, 'timestamp': '2025-09-10 02:18:17.696819', 'step': 1134, 'epoch': 1} {'type': 'loss', 'content': 0.02047579549252987, 'timestamp': '2025-09-10 02:18:17.703921', 'step': 1135, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 7119318626112}, 'timestamp': '2025-09-10 02:18:17.736305', 'step': 1135, 'epoch': 1} {'type': 'loss', 'content': 0.0026631599757820368, 'timestamp': '2025-09-10 02:18:17.764785', 'step': 1136, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 400], 'flops': 11865355886272}, 'timestamp': '2025-09-10 02:18:17.800907', 'step': 1136, 'epoch': 1} {'type': 'loss', 'content': 0.013182473368942738, 'timestamp': '2025-09-10 02:18:17.816089', 'step': 1137, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 5695507448064}, 'timestamp': '2025-09-10 02:18:17.847937', 'step': 1137, 'epoch': 1} {'type': 'loss', 'content': 0.010062654502689838, 'timestamp': '2025-09-10 02:18:17.852496', 'step': 1138, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 352], 'flops': 10441544708224}, 'timestamp': '2025-09-10 02:18:17.886744', 'step': 1138, 'epoch': 1} {'type': 'loss', 'content': 0.003278909483924508, 'timestamp': '2025-09-10 02:18:17.900412', 'step': 1139, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 4746299996032}, 'timestamp': '2025-09-10 02:18:17.930709', 'step': 1139, 'epoch': 1} {'type': 'loss', 'content': 0.0194843877106905, 'timestamp': '2025-09-10 02:18:17.954539', 'step': 1140, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 7593922352128}, 'timestamp': '2025-09-10 02:18:17.986665', 'step': 1140, 'epoch': 1} {'type': 'loss', 'content': 0.002890173811465502, 'timestamp': '2025-09-10 02:18:17.992197', 'step': 1141, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 8543129804160}, 'timestamp': '2025-09-10 02:18:18.024715', 'step': 1141, 'epoch': 1} {'type': 'loss', 'content': 0.004366376902908087, 'timestamp': '2025-09-10 02:18:18.035599', 'step': 1142, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 7119318626112}, 'timestamp': '2025-09-10 02:18:18.066749', 'step': 1142, 'epoch': 1} {'type': 'loss', 'content': 0.005158752668648958, 'timestamp': '2025-09-10 02:18:18.074300', 'step': 1143, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 7593922352128}, 'timestamp': '2025-09-10 02:18:18.108596', 'step': 1143, 'epoch': 1} {'type': 'loss', 'content': 0.009630167856812477, 'timestamp': '2025-09-10 02:18:18.137319', 'step': 1144, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 6644714900096}, 'timestamp': '2025-09-10 02:18:18.183432', 'step': 1144, 'epoch': 1} {'type': 'loss', 'content': 0.012025467120110989, 'timestamp': '2025-09-10 02:18:18.187852', 'step': 1145, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 4746299996032}, 'timestamp': '2025-09-10 02:18:18.219751', 'step': 1145, 'epoch': 1} {'type': 'loss', 'content': 0.0020432344172149897, 'timestamp': '2025-09-10 02:18:18.222161', 'step': 1146, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 6644714900096}, 'timestamp': '2025-09-10 02:18:18.253028', 'step': 1146, 'epoch': 1} {'type': 'loss', 'content': 0.006666641216725111, 'timestamp': '2025-09-10 02:18:18.260029', 'step': 1147, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 5695507448064}, 'timestamp': '2025-09-10 02:18:18.290912', 'step': 1147, 'epoch': 1} {'type': 'loss', 'content': 0.007748906966298819, 'timestamp': '2025-09-10 02:18:18.316141', 'step': 1148, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 7593922352128}, 'timestamp': '2025-09-10 02:18:18.348874', 'step': 1148, 'epoch': 1} {'type': 'loss', 'content': 0.02841871976852417, 'timestamp': '2025-09-10 02:18:18.354345', 'step': 1149, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 7593922352128}, 'timestamp': '2025-09-10 02:18:18.385509', 'step': 1149, 'epoch': 1} {'type': 'loss', 'content': 0.0032413543667644262, 'timestamp': '2025-09-10 02:18:18.393360', 'step': 1150, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 7119318626112}, 'timestamp': '2025-09-10 02:18:18.423775', 'step': 1150, 'epoch': 1} {'type': 'loss', 'content': 0.010790413245558739, 'timestamp': '2025-09-10 02:18:18.431200', 'step': 1151, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 304], 'flops': 9017733530176}, 'timestamp': '2025-09-10 02:18:18.461383', 'step': 1151, 'epoch': 1} {'type': 'loss', 'content': 0.017973562702536583, 'timestamp': '2025-09-10 02:18:18.494542', 'step': 1152, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 6644714900096}, 'timestamp': '2025-09-10 02:18:18.525330', 'step': 1152, 'epoch': 1} {'type': 'loss', 'content': 0.002686847234144807, 'timestamp': '2025-09-10 02:18:18.529764', 'step': 1153, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 6170111174080}, 'timestamp': '2025-09-10 02:18:18.562080', 'step': 1153, 'epoch': 1} {'type': 'loss', 'content': 0.002674214309081435, 'timestamp': '2025-09-10 02:18:18.567873', 'step': 1154, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 8068526078144}, 'timestamp': '2025-09-10 02:18:18.599988', 'step': 1154, 'epoch': 1} {'type': 'loss', 'content': 0.0012537644943222404, 'timestamp': '2025-09-10 02:18:18.608841', 'step': 1155, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 7593922352128}, 'timestamp': '2025-09-10 02:18:18.642028', 'step': 1155, 'epoch': 1} {'type': 'loss', 'content': 0.011615641415119171, 'timestamp': '2025-09-10 02:18:18.669363', 'step': 1156, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 176], 'flops': 5220903722048}, 'timestamp': '2025-09-10 02:18:18.701750', 'step': 1156, 'epoch': 1} {'type': 'loss', 'content': 0.01832910068333149, 'timestamp': '2025-09-10 02:18:18.703887', 'step': 1157, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 6644714900096}, 'timestamp': '2025-09-10 02:18:18.735275', 'step': 1157, 'epoch': 1} {'type': 'loss', 'content': 0.006739361677318811, 'timestamp': '2025-09-10 02:18:18.741931', 'step': 1158, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 6170111174080}, 'timestamp': '2025-09-10 02:18:18.772936', 'step': 1158, 'epoch': 1} {'type': 'loss', 'content': 0.01089425478130579, 'timestamp': '2025-09-10 02:18:18.779689', 'step': 1159, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 6170111174080}, 'timestamp': '2025-09-10 02:18:18.810853', 'step': 1159, 'epoch': 1} {'type': 'loss', 'content': 0.00913853757083416, 'timestamp': '2025-09-10 02:18:18.838352', 'step': 1160, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 8068526078144}, 'timestamp': '2025-09-10 02:18:18.874197', 'step': 1160, 'epoch': 1} {'type': 'loss', 'content': 0.014518055133521557, 'timestamp': '2025-09-10 02:18:18.881380', 'step': 1161, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 7593922352128}, 'timestamp': '2025-09-10 02:18:18.913054', 'step': 1161, 'epoch': 1} {'type': 'loss', 'content': 0.006596317049115896, 'timestamp': '2025-09-10 02:18:18.920645', 'step': 1162, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 7593922352128}, 'timestamp': '2025-09-10 02:18:18.952024', 'step': 1162, 'epoch': 1} {'type': 'loss', 'content': 0.014293434098362923, 'timestamp': '2025-09-10 02:18:18.959596', 'step': 1163, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 176], 'flops': 5220903722048}, 'timestamp': '2025-09-10 02:18:18.991733', 'step': 1163, 'epoch': 1} {'type': 'loss', 'content': 0.010215037502348423, 'timestamp': '2025-09-10 02:18:19.016672', 'step': 1164, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 7119318626112}, 'timestamp': '2025-09-10 02:18:19.048796', 'step': 1164, 'epoch': 1} {'type': 'loss', 'content': 0.030613288283348083, 'timestamp': '2025-09-10 02:18:19.053700', 'step': 1165, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 5695507448064}, 'timestamp': '2025-09-10 02:18:19.085341', 'step': 1165, 'epoch': 1} {'type': 'loss', 'content': 0.0022717637475579977, 'timestamp': '2025-09-10 02:18:19.089396', 'step': 1166, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 7593922352128}, 'timestamp': '2025-09-10 02:18:19.120100', 'step': 1166, 'epoch': 1} {'type': 'loss', 'content': 0.009593302384018898, 'timestamp': '2025-09-10 02:18:19.127529', 'step': 1167, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 8068526078144}, 'timestamp': '2025-09-10 02:18:19.159440', 'step': 1167, 'epoch': 1} {'type': 'loss', 'content': 0.006897877436131239, 'timestamp': '2025-09-10 02:18:19.190357', 'step': 1168, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 416], 'flops': 12339959612288}, 'timestamp': '2025-09-10 02:18:19.227629', 'step': 1168, 'epoch': 1} {'type': 'loss', 'content': 0.06167227774858475, 'timestamp': '2025-09-10 02:18:19.242984', 'step': 1169, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 176], 'flops': 5220903722048}, 'timestamp': '2025-09-10 02:18:19.274562', 'step': 1169, 'epoch': 1} {'type': 'loss', 'content': 0.04564559459686279, 'timestamp': '2025-09-10 02:18:19.278259', 'step': 1170, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 7119318626112}, 'timestamp': '2025-09-10 02:18:19.310480', 'step': 1170, 'epoch': 1} {'type': 'loss', 'content': 0.0009320880053564906, 'timestamp': '2025-09-10 02:18:19.317812', 'step': 1171, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 304], 'flops': 9017733530176}, 'timestamp': '2025-09-10 02:18:19.350416', 'step': 1171, 'epoch': 1} {'type': 'loss', 'content': 0.002427774015814066, 'timestamp': '2025-09-10 02:18:19.382777', 'step': 1172, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 8543129804160}, 'timestamp': '2025-09-10 02:18:19.416252', 'step': 1172, 'epoch': 1} {'type': 'loss', 'content': 0.010821384377777576, 'timestamp': '2025-09-10 02:18:19.424201', 'step': 1173, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 5695507448064}, 'timestamp': '2025-09-10 02:18:19.458546', 'step': 1173, 'epoch': 1} {'type': 'loss', 'content': 0.025808248668909073, 'timestamp': '2025-09-10 02:18:19.462683', 'step': 1174, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 432], 'flops': 12814563338304}, 'timestamp': '2025-09-10 02:18:19.504314', 'step': 1174, 'epoch': 1} {'type': 'loss', 'content': 0.061185259371995926, 'timestamp': '2025-09-10 02:18:19.520458', 'step': 1175, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 8543129804160}, 'timestamp': '2025-09-10 02:18:19.551166', 'step': 1175, 'epoch': 1} {'type': 'loss', 'content': 0.0003842521400656551, 'timestamp': '2025-09-10 02:18:19.582991', 'step': 1176, 'epoch': 1} {'type': 'flops', 'content': [{'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5695213340928}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3796808916480}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4429610391296}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4746011128704}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4746011128704}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6011614078336}, {'type': 'perplexity', 'in_batch_dim': [8, 736], 'batch_size': 8, 'flops': 14554433988352}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3796808916480}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4113209653888}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3796808916480}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4746011128704}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2847606704256}, {'type': 'perplexity', 'in_batch_dim': [8, 320], 'batch_size': 8, 'flops': 6328014815744}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5378812603520}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3796808916480}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4746011128704}, {'type': 'perplexity', 'in_batch_dim': [8, 352], 'batch_size': 8, 'flops': 6960816290560}, {'type': 'perplexity', 'in_batch_dim': [8, 384], 'batch_size': 8, 'flops': 7593617765376}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5378812603520}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4429610391296}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4113209653888}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5062411866112}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5062411866112}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4429610391296}, {'type': 'perplexity', 'in_batch_dim': [8, 480], 'batch_size': 8, 'flops': 9492022189824}, {'type': 'perplexity', 'in_batch_dim': [8, 448], 'batch_size': 8, 'flops': 8859220715008}, {'type': 'perplexity', 'in_batch_dim': [8, 320], 'batch_size': 8, 'flops': 6328014815744}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3796808916480}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6011614078336}, {'type': 'perplexity', 'in_batch_dim': [8, 320], 'batch_size': 8, 'flops': 6328014815744}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5695213340928}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4746011128704}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4429610391296}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5378812603520}, {'type': 'perplexity', 'in_batch_dim': [8, 544], 'batch_size': 8, 'flops': 10757625139456}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4429610391296}, {'type': 'perplexity', 'in_batch_dim': [8, 320], 'batch_size': 8, 'flops': 6328014815744}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4113209653888}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4113209653888}, {'type': 'perplexity', 'in_batch_dim': [8, 320], 'batch_size': 8, 'flops': 6328014815744}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3480408179072}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5062411866112}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6011614078336}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4429610391296}, {'type': 'perplexity', 'in_batch_dim': [8, 384], 'batch_size': 8, 'flops': 7593617765376}, {'type': 'perplexity', 'in_batch_dim': [8, 416], 'batch_size': 8, 'flops': 8226419240192}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4746011128704}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3480408179072}, {'type': 'perplexity', 'in_batch_dim': [8, 384], 'batch_size': 8, 'flops': 7593617765376}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3480408179072}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3480408179072}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5378812603520}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3796808916480}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4113209653888}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5062411866112}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5378812603520}, {'type': 'perplexity', 'in_batch_dim': [8, 320], 'batch_size': 8, 'flops': 6328014815744}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4429610391296}, {'type': 'perplexity', 'in_batch_dim': [8, 160], 'batch_size': 8, 'flops': 3164007441664}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4113209653888}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5695213340928}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3796808916480}, {'type': 'perplexity', 'in_batch_dim': [8, 160], 'batch_size': 8, 'flops': 3164007441664}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6011614078336}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5062411866112}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4429610391296}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3480408179072}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4746011128704}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3480408179072}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4113209653888}, {'type': 'perplexity', 'in_batch_dim': [8, 624], 'batch_size': 8, 'flops': 12339628826496}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4113209653888}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5695213340928}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5062411866112}, {'type': 'perplexity', 'in_batch_dim': [8, 384], 'batch_size': 8, 'flops': 7593617765376}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4429610391296}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4113209653888}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5062411866112}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4429610391296}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5695213340928}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3796808916480}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5378812603520}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4429610391296}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5378812603520}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3796808916480}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5695213340928}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4429610391296}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5062411866112}, {'type': 'perplexity', 'in_batch_dim': [8, 160], 'batch_size': 8, 'flops': 3164007441664}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4746011128704}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5695213340928}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3796808916480}, {'type': 'perplexity', 'in_batch_dim': [8, 416], 'batch_size': 8, 'flops': 8226419240192}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3796808916480}, {'type': 'perplexity', 'in_batch_dim': [8, 160], 'batch_size': 8, 'flops': 3164007441664}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4746011128704}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3796808916480}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2847606704256}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5378812603520}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4429610391296}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4113209653888}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4429610391296}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6011614078336}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4113209653888}, {'type': 'perplexity', 'in_batch_dim': [8, 320], 'batch_size': 8, 'flops': 6328014815744}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4746011128704}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4746011128704}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3796808916480}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5062411866112}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3480408179072}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3796808916480}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3480408179072}, {'type': 'perplexity', 'in_batch_dim': [8, 352], 'batch_size': 8, 'flops': 6960816290560}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5062411866112}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4429610391296}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6011614078336}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5062411866112}, {'type': 'perplexity', 'in_batch_dim': [8, 576], 'batch_size': 8, 'flops': 11390426614272}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4429610391296}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3796808916480}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6011614078336}, {'type': 'perplexity', 'in_batch_dim': [8, 432], 'batch_size': 8, 'flops': 8542819977600}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5378812603520}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5378812603520}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5062411866112}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4113209653888}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4113209653888}, {'type': 'perplexity', 'in_batch_dim': [8, 368], 'batch_size': 8, 'flops': 7277217027968}, {'type': 'perplexity', 'in_batch_dim': [8, 160], 'batch_size': 8, 'flops': 3164007441664}, {'type': 'perplexity', 'in_batch_dim': [8, 368], 'batch_size': 8, 'flops': 7277217027968}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3796808916480}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4746011128704}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4113209653888}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4429610391296}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5695213340928}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5695213340928}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5062411866112}, {'type': 'perplexity', 'in_batch_dim': [8, 432], 'batch_size': 8, 'flops': 8542819977600}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5062411866112}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4113209653888}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3480408179072}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3480408179072}, {'type': 'perplexity', 'in_batch_dim': [8, 160], 'batch_size': 8, 'flops': 3164007441664}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4113209653888}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3796808916480}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5062411866112}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 368], 'batch_size': 8, 'flops': 7277217027968}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5378812603520}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5378812603520}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5062411866112}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2847606704256}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4429610391296}, {'type': 'perplexity', 'in_batch_dim': [8, 320], 'batch_size': 8, 'flops': 6328014815744}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3796808916480}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4429610391296}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4113209653888}, {'type': 'perplexity', 'in_batch_dim': [8, 384], 'batch_size': 8, 'flops': 7593617765376}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5378812603520}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4746011128704}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3480408179072}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5062411866112}, {'type': 'perplexity', 'in_batch_dim': [8, 320], 'batch_size': 8, 'flops': 6328014815744}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5378812603520}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4113209653888}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4113209653888}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5695213340928}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3480408179072}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6011614078336}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4746011128704}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5378812603520}, {'type': 'perplexity', 'in_batch_dim': [8, 320], 'batch_size': 8, 'flops': 6328014815744}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4429610391296}, {'type': 'perplexity', 'in_batch_dim': [8, 320], 'batch_size': 8, 'flops': 6328014815744}, {'type': 'perplexity', 'in_batch_dim': [8, 416], 'batch_size': 8, 'flops': 8226419240192}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5062411866112}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5062411866112}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2847606704256}, {'type': 'perplexity', 'in_batch_dim': [8, 336], 'batch_size': 8, 'flops': 6644415553152}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6011614078336}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5062411866112}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4113209653888}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5695213340928}, {'type': 'perplexity', 'in_batch_dim': [8, 416], 'batch_size': 8, 'flops': 8226419240192}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3796808916480}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5695213340928}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4746011128704}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4429610391296}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4113209653888}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3480408179072}, {'type': 'perplexity', 'in_batch_dim': [8, 320], 'batch_size': 8, 'flops': 6328014815744}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3480408179072}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5378812603520}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6011614078336}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4429610391296}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5695213340928}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5695213340928}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3480408179072}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4429610391296}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5062411866112}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3480408179072}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4113209653888}, {'type': 'perplexity', 'in_batch_dim': [8, 352], 'batch_size': 8, 'flops': 6960816290560}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4113209653888}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2847606704256}, {'type': 'perplexity', 'in_batch_dim': [8, 160], 'batch_size': 8, 'flops': 3164007441664}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3796808916480}, {'type': 'perplexity', 'in_batch_dim': [8, 496], 'batch_size': 8, 'flops': 9808422927232}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3480408179072}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4113209653888}, {'type': 'perplexity', 'in_batch_dim': [8, 544], 'batch_size': 8, 'flops': 10757625139456}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6011614078336}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5378812603520}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5695213340928}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4746011128704}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5695213340928}, {'type': 'perplexity', 'in_batch_dim': [8, 384], 'batch_size': 8, 'flops': 7593617765376}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3480408179072}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5378812603520}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3480408179072}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5378812603520}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5062411866112}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3796808916480}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5062411866112}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5695213340928}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5378812603520}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4429610391296}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5062411866112}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3480408179072}, {'type': 'perplexity', 'in_batch_dim': [8, 656], 'batch_size': 8, 'flops': 12972430301312}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5062411866112}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3796808916480}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3480408179072}, {'type': 'perplexity', 'in_batch_dim': [8, 160], 'batch_size': 8, 'flops': 3164007441664}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5695213340928}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6011614078336}, {'type': 'perplexity', 'in_batch_dim': [8, 320], 'batch_size': 8, 'flops': 6328014815744}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5062411866112}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5062411866112}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5695213340928}, {'type': 'perplexity', 'in_batch_dim': [8, 320], 'batch_size': 8, 'flops': 6328014815744}, {'type': 'perplexity', 'in_batch_dim': [8, 384], 'batch_size': 8, 'flops': 7593617765376}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5062411866112}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4746011128704}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4113209653888}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4746011128704}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4746011128704}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3480408179072}, {'type': 'perplexity', 'in_batch_dim': [8, 320], 'batch_size': 8, 'flops': 6328014815744}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5695213340928}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5062411866112}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4113209653888}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4746011128704}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2847606704256}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3796808916480}, {'type': 'perplexity', 'in_batch_dim': [8, 336], 'batch_size': 8, 'flops': 6644415553152}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4113209653888}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4746011128704}, {'type': 'perplexity', 'in_batch_dim': [8, 496], 'batch_size': 8, 'flops': 9808422927232}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5378812603520}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5378812603520}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4429610391296}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5062411866112}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6011614078336}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4746011128704}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3796808916480}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4113209653888}, {'type': 'perplexity', 'in_batch_dim': [8, 496], 'batch_size': 8, 'flops': 9808422927232}, {'type': 'perplexity', 'in_batch_dim': [8, 448], 'batch_size': 8, 'flops': 8859220715008}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6011614078336}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3796808916480}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3480408179072}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5062411866112}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4429610391296}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4113209653888}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4746011128704}, {'type': 'perplexity', 'in_batch_dim': [8, 320], 'batch_size': 8, 'flops': 6328014815744}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6011614078336}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3480408179072}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3796808916480}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3796808916480}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4746011128704}, {'type': 'perplexity', 'in_batch_dim': [8, 416], 'batch_size': 8, 'flops': 8226419240192}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5695213340928}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6011614078336}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6011614078336}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4746011128704}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3480408179072}, {'type': 'perplexity', 'in_batch_dim': [8, 160], 'batch_size': 8, 'flops': 3164007441664}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5695213340928}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3480408179072}, {'type': 'perplexity', 'in_batch_dim': [8, 400], 'batch_size': 8, 'flops': 7910018502784}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4429610391296}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5378812603520}, {'type': 'perplexity', 'in_batch_dim': [8, 400], 'batch_size': 8, 'flops': 7910018502784}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5378812603520}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3480408179072}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6011614078336}, {'type': 'perplexity', 'in_batch_dim': [8, 160], 'batch_size': 8, 'flops': 3164007441664}, {'type': 'perplexity', 'in_batch_dim': [8, 352], 'batch_size': 8, 'flops': 6960816290560}, {'type': 'perplexity', 'in_batch_dim': [8, 320], 'batch_size': 8, 'flops': 6328014815744}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5062411866112}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5062411866112}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4113209653888}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4429610391296}, {'type': 'perplexity', 'in_batch_dim': [8, 352], 'batch_size': 8, 'flops': 6960816290560}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4113209653888}, {'type': 'perplexity', 'in_batch_dim': [8, 160], 'batch_size': 8, 'flops': 3164007441664}, {'type': 'perplexity', 'in_batch_dim': [8, 336], 'batch_size': 8, 'flops': 6644415553152}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4429610391296}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3796808916480}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5378812603520}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5378812603520}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4113209653888}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5378812603520}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3796808916480}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5378812603520}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5378812603520}, {'type': 'perplexity', 'in_batch_dim': [8, 160], 'batch_size': 8, 'flops': 3164007441664}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3480408179072}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6011614078336}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5695213340928}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5378812603520}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5378812603520}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4429610391296}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4746011128704}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6011614078336}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5062411866112}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6011614078336}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5378812603520}, {'type': 'perplexity', 'in_batch_dim': [8, 160], 'batch_size': 8, 'flops': 3164007441664}, {'type': 'perplexity', 'in_batch_dim': [8, 160], 'batch_size': 8, 'flops': 3164007441664}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5062411866112}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5062411866112}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3796808916480}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6011614078336}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4113209653888}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4746011128704}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5378812603520}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3480408179072}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3796808916480}, {'type': 'perplexity', 'in_batch_dim': [8, 320], 'batch_size': 8, 'flops': 6328014815744}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5062411866112}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4746011128704}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5062411866112}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4746011128704}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6011614078336}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3796808916480}, {'type': 'perplexity', 'in_batch_dim': [8, 160], 'batch_size': 8, 'flops': 3164007441664}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2847606704256}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4429610391296}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4746011128704}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5062411866112}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5378812603520}, {'type': 'perplexity', 'in_batch_dim': [8, 464], 'batch_size': 8, 'flops': 9175621452416}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4113209653888}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3480408179072}, {'type': 'perplexity', 'in_batch_dim': [8, 384], 'batch_size': 8, 'flops': 7593617765376}, {'type': 'perplexity', 'in_batch_dim': [8, 448], 'batch_size': 8, 'flops': 8859220715008}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5378812603520}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4113209653888}, {'type': 'perplexity', 'in_batch_dim': [8, 320], 'batch_size': 8, 'flops': 6328014815744}, {'type': 'perplexity', 'in_batch_dim': [8, 560], 'batch_size': 8, 'flops': 11074025876864}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5378812603520}, {'type': 'perplexity', 'in_batch_dim': [8, 160], 'batch_size': 8, 'flops': 3164007441664}, {'type': 'perplexity', 'in_batch_dim': [8, 320], 'batch_size': 8, 'flops': 6328014815744}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5062411866112}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6011614078336}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4746011128704}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6011614078336}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3796808916480}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4429610391296}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4113209653888}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4746011128704}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3480408179072}, {'type': 'perplexity', 'in_batch_dim': [8, 384], 'batch_size': 8, 'flops': 7593617765376}, {'type': 'perplexity', 'in_batch_dim': [8, 576], 'batch_size': 8, 'flops': 11390426614272}, {'type': 'perplexity', 'in_batch_dim': [8, 160], 'batch_size': 8, 'flops': 3164007441664}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4746011128704}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4746011128704}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4429610391296}, {'type': 'perplexity', 'in_batch_dim': [8, 336], 'batch_size': 8, 'flops': 6644415553152}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4746011128704}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3796808916480}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3796808916480}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5062411866112}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5378812603520}, {'type': 'perplexity', 'in_batch_dim': [8, 368], 'batch_size': 8, 'flops': 7277217027968}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3480408179072}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4113209653888}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4746011128704}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3480408179072}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4746011128704}, {'type': 'perplexity', 'in_batch_dim': [8, 1168], 'batch_size': 8, 'flops': 23097253898368}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5062411866112}, {'type': 'perplexity', 'in_batch_dim': [8, 160], 'batch_size': 8, 'flops': 3164007441664}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4746011128704}, {'type': 'perplexity', 'in_batch_dim': [8, 640], 'batch_size': 8, 'flops': 12656029563904}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5378812603520}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4429610391296}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4429610391296}, {'type': 'perplexity', 'in_batch_dim': [8, 384], 'batch_size': 8, 'flops': 7593617765376}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3480408179072}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4113209653888}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3796808916480}, {'type': 'perplexity', 'in_batch_dim': [8, 336], 'batch_size': 8, 'flops': 6644415553152}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3480408179072}, {'type': 'perplexity', 'in_batch_dim': [6, 208], 'batch_size': 8, 'flops': 4113209653888}], 'timestamp': '2025-09-10 02:18:29.776063', 'step': 1176, 'epoch': 1} {'type': 'pplx', 'content': 16784163.124731667, 'timestamp': '2025-09-10 02:18:29.779308', 'step': 1176, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 320], 'flops': 9492337256192}, 'timestamp': '2025-09-10 02:18:29.810633', 'step': 1176, 'epoch': 1} {'type': 'loss', 'content': 0.017028305679559708, 'timestamp': '2025-09-10 02:18:29.818914', 'step': 1177, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 5695507448064}, 'timestamp': '2025-09-10 02:18:29.850203', 'step': 1177, 'epoch': 1} {'type': 'loss', 'content': 0.03579110652208328, 'timestamp': '2025-09-10 02:18:29.854250', 'step': 1178, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 7119318626112}, 'timestamp': '2025-09-10 02:18:29.885761', 'step': 1178, 'epoch': 1} {'type': 'loss', 'content': 0.012345547787845135, 'timestamp': '2025-09-10 02:18:29.893048', 'step': 1179, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-10 02:18:29.923823', 'step': 1179, 'epoch': 1} {'type': 'loss', 'content': 0.014899312518537045, 'timestamp': '2025-09-10 02:18:29.947959', 'step': 1180, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 5695507448064}, 'timestamp': '2025-09-10 02:18:29.978721', 'step': 1180, 'epoch': 1} {'type': 'loss', 'content': 0.008222085423767567, 'timestamp': '2025-09-10 02:18:29.980848', 'step': 1181, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 512], 'flops': 15187581968384}, 'timestamp': '2025-09-10 02:18:30.023345', 'step': 1181, 'epoch': 1} {'type': 'loss', 'content': 0.0018380869878455997, 'timestamp': '2025-09-10 02:18:30.041100', 'step': 1182, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 8543129804160}, 'timestamp': '2025-09-10 02:18:30.072675', 'step': 1182, 'epoch': 1} {'type': 'loss', 'content': 0.01529020071029663, 'timestamp': '2025-09-10 02:18:30.083507', 'step': 1183, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 176], 'flops': 5220903722048}, 'timestamp': '2025-09-10 02:18:30.115357', 'step': 1183, 'epoch': 1} {'type': 'loss', 'content': 0.0014780040364712477, 'timestamp': '2025-09-10 02:18:30.139939', 'step': 1184, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 6170111174080}, 'timestamp': '2025-09-10 02:18:30.171392', 'step': 1184, 'epoch': 1} {'type': 'loss', 'content': 0.05387040600180626, 'timestamp': '2025-09-10 02:18:30.175793', 'step': 1185, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 6170111174080}, 'timestamp': '2025-09-10 02:18:30.206809', 'step': 1185, 'epoch': 1} {'type': 'loss', 'content': 0.022361472249031067, 'timestamp': '2025-09-10 02:18:30.213526', 'step': 1186, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 8543129804160}, 'timestamp': '2025-09-10 02:18:30.246344', 'step': 1186, 'epoch': 1} {'type': 'loss', 'content': 0.004187957849353552, 'timestamp': '2025-09-10 02:18:30.257187', 'step': 1187, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 4746299996032}, 'timestamp': '2025-09-10 02:18:30.288529', 'step': 1187, 'epoch': 1} {'type': 'loss', 'content': 0.00469655217602849, 'timestamp': '2025-09-10 02:18:30.311975', 'step': 1188, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 6170111174080}, 'timestamp': '2025-09-10 02:18:30.343483', 'step': 1188, 'epoch': 1} {'type': 'loss', 'content': 0.013641082681715488, 'timestamp': '2025-09-10 02:18:30.347869', 'step': 1189, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 6644714900096}, 'timestamp': '2025-09-10 02:18:30.379141', 'step': 1189, 'epoch': 1} {'type': 'loss', 'content': 0.024424701929092407, 'timestamp': '2025-09-10 02:18:30.386124', 'step': 1190, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 304], 'flops': 9017733530176}, 'timestamp': '2025-09-10 02:18:30.417664', 'step': 1190, 'epoch': 1} {'type': 'loss', 'content': 0.01394572388380766, 'timestamp': '2025-09-10 02:18:30.429702', 'step': 1191, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 6170111174080}, 'timestamp': '2025-09-10 02:18:30.460633', 'step': 1191, 'epoch': 1} {'type': 'loss', 'content': 0.011202634312212467, 'timestamp': '2025-09-10 02:18:30.488313', 'step': 1192, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 336], 'flops': 9966940982208}, 'timestamp': '2025-09-10 02:18:30.520611', 'step': 1192, 'epoch': 1} {'type': 'loss', 'content': 0.00934526789933443, 'timestamp': '2025-09-10 02:18:30.533275', 'step': 1193, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 7593922352128}, 'timestamp': '2025-09-10 02:18:30.564516', 'step': 1193, 'epoch': 1} {'type': 'loss', 'content': 0.013034219853579998, 'timestamp': '2025-09-10 02:18:30.572148', 'step': 1194, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 624], 'flops': 18509808050496}, 'timestamp': '2025-09-10 02:18:30.626335', 'step': 1194, 'epoch': 1} {'type': 'loss', 'content': 0.016260338947176933, 'timestamp': '2025-09-10 02:18:30.648043', 'step': 1195, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 8543129804160}, 'timestamp': '2025-09-10 02:18:30.679582', 'step': 1195, 'epoch': 1} {'type': 'loss', 'content': 0.01205496210604906, 'timestamp': '2025-09-10 02:18:30.711057', 'step': 1196, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 7593922352128}, 'timestamp': '2025-09-10 02:18:30.742251', 'step': 1196, 'epoch': 1} {'type': 'loss', 'content': 0.008653457276523113, 'timestamp': '2025-09-10 02:18:30.747482', 'step': 1197, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 400], 'flops': 11865355886272}, 'timestamp': '2025-09-10 02:18:30.785544', 'step': 1197, 'epoch': 1} {'type': 'loss', 'content': 0.017093000933527946, 'timestamp': '2025-09-10 02:18:30.801131', 'step': 1198, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 6644714900096}, 'timestamp': '2025-09-10 02:18:30.833495', 'step': 1198, 'epoch': 1} {'type': 'loss', 'content': 0.004972951021045446, 'timestamp': '2025-09-10 02:18:30.840111', 'step': 1199, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 5695507448064}, 'timestamp': '2025-09-10 02:18:30.871337', 'step': 1199, 'epoch': 1} {'type': 'loss', 'content': 0.027272850275039673, 'timestamp': '2025-09-10 02:18:30.896443', 'step': 1200, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 336], 'flops': 9966940982208}, 'timestamp': '2025-09-10 02:18:30.929351', 'step': 1200, 'epoch': 1} {'type': 'loss', 'content': 0.011625121347606182, 'timestamp': '2025-09-10 02:18:30.942023', 'step': 1201, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 464], 'flops': 13763770790336}, 'timestamp': '2025-09-10 02:18:30.983449', 'step': 1201, 'epoch': 1} {'type': 'loss', 'content': 0.03317030146718025, 'timestamp': '2025-09-10 02:18:31.000458', 'step': 1202, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 384], 'flops': 11390752160256}, 'timestamp': '2025-09-10 02:18:31.036444', 'step': 1202, 'epoch': 1} {'type': 'loss', 'content': 0.002671575639396906, 'timestamp': '2025-09-10 02:18:31.050463', 'step': 1203, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 6644714900096}, 'timestamp': '2025-09-10 02:18:31.083564', 'step': 1203, 'epoch': 1} {'type': 'loss', 'content': 0.009755785576999187, 'timestamp': '2025-09-10 02:18:31.111385', 'step': 1204, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 304], 'flops': 9017733530176}, 'timestamp': '2025-09-10 02:18:31.149839', 'step': 1204, 'epoch': 1} {'type': 'loss', 'content': 0.03215007483959198, 'timestamp': '2025-09-10 02:18:31.159052', 'step': 1205, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 8543129804160}, 'timestamp': '2025-09-10 02:18:31.196525', 'step': 1205, 'epoch': 1} {'type': 'loss', 'content': 0.010887703858315945, 'timestamp': '2025-09-10 02:18:31.207286', 'step': 1206, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 8543129804160}, 'timestamp': '2025-09-10 02:18:31.251369', 'step': 1206, 'epoch': 1} {'type': 'loss', 'content': 0.020919183269143105, 'timestamp': '2025-09-10 02:18:31.261938', 'step': 1207, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 304], 'flops': 9017733530176}, 'timestamp': '2025-09-10 02:18:31.299237', 'step': 1207, 'epoch': 1} {'type': 'loss', 'content': 0.04709470644593239, 'timestamp': '2025-09-10 02:18:31.332133', 'step': 1208, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 176], 'flops': 5220903722048}, 'timestamp': '2025-09-10 02:18:31.374807', 'step': 1208, 'epoch': 1} {'type': 'loss', 'content': 0.01061093620955944, 'timestamp': '2025-09-10 02:18:31.378490', 'step': 1209, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 7119318626112}, 'timestamp': '2025-09-10 02:18:31.415608', 'step': 1209, 'epoch': 1} {'type': 'loss', 'content': 0.029514219611883163, 'timestamp': '2025-09-10 02:18:31.422753', 'step': 1210, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 8068526078144}, 'timestamp': '2025-09-10 02:18:31.455106', 'step': 1210, 'epoch': 1} {'type': 'loss', 'content': 0.012318803928792477, 'timestamp': '2025-09-10 02:18:31.465102', 'step': 1211, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 6170111174080}, 'timestamp': '2025-09-10 02:18:31.497093', 'step': 1211, 'epoch': 1} {'type': 'loss', 'content': 0.018550723791122437, 'timestamp': '2025-09-10 02:18:31.524402', 'step': 1212, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 7119318626112}, 'timestamp': '2025-09-10 02:18:31.557803', 'step': 1212, 'epoch': 1} {'type': 'loss', 'content': 0.008835774846374989, 'timestamp': '2025-09-10 02:18:31.562204', 'step': 1213, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 6170111174080}, 'timestamp': '2025-09-10 02:18:31.595282', 'step': 1213, 'epoch': 1} {'type': 'loss', 'content': 0.01745608262717724, 'timestamp': '2025-09-10 02:18:31.601476', 'step': 1214, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 8068526078144}, 'timestamp': '2025-09-10 02:18:31.633237', 'step': 1214, 'epoch': 1} {'type': 'loss', 'content': 0.011802353896200657, 'timestamp': '2025-09-10 02:18:31.642797', 'step': 1215, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 8068526078144}, 'timestamp': '2025-09-10 02:18:31.674542', 'step': 1215, 'epoch': 1} {'type': 'loss', 'content': 0.01431284286081791, 'timestamp': '2025-09-10 02:18:31.705475', 'step': 1216, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 176], 'flops': 5220903722048}, 'timestamp': '2025-09-10 02:18:31.737086', 'step': 1216, 'epoch': 1} {'type': 'loss', 'content': 0.025099601596593857, 'timestamp': '2025-09-10 02:18:31.739302', 'step': 1217, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 7119318626112}, 'timestamp': '2025-09-10 02:18:31.770797', 'step': 1217, 'epoch': 1} {'type': 'loss', 'content': 0.02076675556600094, 'timestamp': '2025-09-10 02:18:31.778171', 'step': 1218, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 7119318626112}, 'timestamp': '2025-09-10 02:18:31.809728', 'step': 1218, 'epoch': 1} {'type': 'loss', 'content': 0.01054247748106718, 'timestamp': '2025-09-10 02:18:31.817146', 'step': 1219, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 7119318626112}, 'timestamp': '2025-09-10 02:18:31.848780', 'step': 1219, 'epoch': 1} {'type': 'loss', 'content': 0.00928487628698349, 'timestamp': '2025-09-10 02:18:31.877107', 'step': 1220, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 4746299996032}, 'timestamp': '2025-09-10 02:18:31.907984', 'step': 1220, 'epoch': 1} {'type': 'loss', 'content': 0.022254247218370438, 'timestamp': '2025-09-10 02:18:31.910448', 'step': 1221, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 176], 'flops': 5220903722048}, 'timestamp': '2025-09-10 02:18:31.941402', 'step': 1221, 'epoch': 1} {'type': 'loss', 'content': 0.011745232157409191, 'timestamp': '2025-09-10 02:18:31.945587', 'step': 1222, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 7119318626112}, 'timestamp': '2025-09-10 02:18:31.976636', 'step': 1222, 'epoch': 1} {'type': 'loss', 'content': 0.02063934877514839, 'timestamp': '2025-09-10 02:18:31.984337', 'step': 1223, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 304], 'flops': 9017733530176}, 'timestamp': '2025-09-10 02:18:32.016405', 'step': 1223, 'epoch': 1} {'type': 'loss', 'content': 0.039252448827028275, 'timestamp': '2025-09-10 02:18:32.049530', 'step': 1224, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 176], 'flops': 5220903722048}, 'timestamp': '2025-09-10 02:18:32.082554', 'step': 1224, 'epoch': 1} {'type': 'loss', 'content': 0.012943130917847157, 'timestamp': '2025-09-10 02:18:32.084596', 'step': 1225, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 304], 'flops': 9017733530176}, 'timestamp': '2025-09-10 02:18:32.116154', 'step': 1225, 'epoch': 1} {'type': 'loss', 'content': 0.02003873698413372, 'timestamp': '2025-09-10 02:18:32.128135', 'step': 1226, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 5695507448064}, 'timestamp': '2025-09-10 02:18:32.159589', 'step': 1226, 'epoch': 1} {'type': 'loss', 'content': 0.02976025640964508, 'timestamp': '2025-09-10 02:18:32.164024', 'step': 1227, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 5695507448064}, 'timestamp': '2025-09-10 02:18:32.194749', 'step': 1227, 'epoch': 1} {'type': 'loss', 'content': 0.012167098931968212, 'timestamp': '2025-09-10 02:18:32.220046', 'step': 1228, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 8068526078144}, 'timestamp': '2025-09-10 02:18:32.251374', 'step': 1228, 'epoch': 1} {'type': 'loss', 'content': 0.021341700106859207, 'timestamp': '2025-09-10 02:18:32.259213', 'step': 1229, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 7119318626112}, 'timestamp': '2025-09-10 02:18:32.289904', 'step': 1229, 'epoch': 1} {'type': 'loss', 'content': 0.005283652804791927, 'timestamp': '2025-09-10 02:18:32.297233', 'step': 1230, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 6644714900096}, 'timestamp': '2025-09-10 02:18:32.329083', 'step': 1230, 'epoch': 1} {'type': 'loss', 'content': 0.01109201367944479, 'timestamp': '2025-09-10 02:18:32.336101', 'step': 1231, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 304], 'flops': 9017733530176}, 'timestamp': '2025-09-10 02:18:32.367104', 'step': 1231, 'epoch': 1} {'type': 'loss', 'content': 0.023132245987653732, 'timestamp': '2025-09-10 02:18:32.400218', 'step': 1232, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 6644714900096}, 'timestamp': '2025-09-10 02:18:32.430711', 'step': 1232, 'epoch': 1} {'type': 'loss', 'content': 0.010825731791555882, 'timestamp': '2025-09-10 02:18:32.435551', 'step': 1233, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 176], 'flops': 5220903722048}, 'timestamp': '2025-09-10 02:18:32.466094', 'step': 1233, 'epoch': 1} {'type': 'loss', 'content': 0.02231140062212944, 'timestamp': '2025-09-10 02:18:32.470249', 'step': 1234, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 6644714900096}, 'timestamp': '2025-09-10 02:18:32.502763', 'step': 1234, 'epoch': 1} {'type': 'loss', 'content': 0.029144972562789917, 'timestamp': '2025-09-10 02:18:32.508228', 'step': 1235, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 6644714900096}, 'timestamp': '2025-09-10 02:18:32.539563', 'step': 1235, 'epoch': 1} {'type': 'loss', 'content': 0.005374426953494549, 'timestamp': '2025-09-10 02:18:32.567470', 'step': 1236, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 8068526078144}, 'timestamp': '2025-09-10 02:18:32.605255', 'step': 1236, 'epoch': 1} {'type': 'loss', 'content': 0.02825375273823738, 'timestamp': '2025-09-10 02:18:32.612149', 'step': 1237, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 304], 'flops': 9017733530176}, 'timestamp': '2025-09-10 02:18:32.645025', 'step': 1237, 'epoch': 1} {'type': 'loss', 'content': 0.020113468170166016, 'timestamp': '2025-09-10 02:18:32.657017', 'step': 1238, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 8543129804160}, 'timestamp': '2025-09-10 02:18:32.690869', 'step': 1238, 'epoch': 1} {'type': 'loss', 'content': 0.018397843465209007, 'timestamp': '2025-09-10 02:18:32.701867', 'step': 1239, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 6170111174080}, 'timestamp': '2025-09-10 02:18:32.735593', 'step': 1239, 'epoch': 1} {'type': 'loss', 'content': 0.009672732092440128, 'timestamp': '2025-09-10 02:18:32.763428', 'step': 1240, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 7593922352128}, 'timestamp': '2025-09-10 02:18:32.794477', 'step': 1240, 'epoch': 1} {'type': 'loss', 'content': 0.015301401726901531, 'timestamp': '2025-09-10 02:18:32.799729', 'step': 1241, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 6644714900096}, 'timestamp': '2025-09-10 02:18:32.832384', 'step': 1241, 'epoch': 1} {'type': 'loss', 'content': 0.003928063903003931, 'timestamp': '2025-09-10 02:18:32.839501', 'step': 1242, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 6644714900096}, 'timestamp': '2025-09-10 02:18:32.871701', 'step': 1242, 'epoch': 1} {'type': 'loss', 'content': 0.011773375794291496, 'timestamp': '2025-09-10 02:18:32.878621', 'step': 1243, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 7593922352128}, 'timestamp': '2025-09-10 02:18:32.918003', 'step': 1243, 'epoch': 1} {'type': 'loss', 'content': 0.0038322594482451677, 'timestamp': '2025-09-10 02:18:32.946709', 'step': 1244, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 6170111174080}, 'timestamp': '2025-09-10 02:18:32.978578', 'step': 1244, 'epoch': 1} {'type': 'loss', 'content': 0.049418624490499496, 'timestamp': '2025-09-10 02:18:32.983159', 'step': 1245, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 6170111174080}, 'timestamp': '2025-09-10 02:18:33.013761', 'step': 1245, 'epoch': 1} {'type': 'loss', 'content': 0.026037881150841713, 'timestamp': '2025-09-10 02:18:33.020631', 'step': 1246, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 6644714900096}, 'timestamp': '2025-09-10 02:18:33.051791', 'step': 1246, 'epoch': 1} {'type': 'loss', 'content': 0.028841393068432808, 'timestamp': '2025-09-10 02:18:33.058811', 'step': 1247, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 7593922352128}, 'timestamp': '2025-09-10 02:18:33.091678', 'step': 1247, 'epoch': 1} {'type': 'loss', 'content': 0.00434449827298522, 'timestamp': '2025-09-10 02:18:33.120341', 'step': 1248, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 176], 'flops': 5220903722048}, 'timestamp': '2025-09-10 02:18:33.151789', 'step': 1248, 'epoch': 1} {'type': 'loss', 'content': 0.0594901405274868, 'timestamp': '2025-09-10 02:18:33.154354', 'step': 1249, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 400], 'flops': 11865355886272}, 'timestamp': '2025-09-10 02:18:33.193377', 'step': 1249, 'epoch': 1} {'type': 'loss', 'content': 0.02595318667590618, 'timestamp': '2025-09-10 02:18:33.209070', 'step': 1250, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 8068526078144}, 'timestamp': '2025-09-10 02:18:33.241081', 'step': 1250, 'epoch': 1} {'type': 'loss', 'content': 0.009637218900024891, 'timestamp': '2025-09-10 02:18:33.251101', 'step': 1251, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 6170111174080}, 'timestamp': '2025-09-10 02:18:33.282406', 'step': 1251, 'epoch': 1} {'type': 'loss', 'content': 0.020436033606529236, 'timestamp': '2025-09-10 02:18:33.310081', 'step': 1252, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 304], 'flops': 9017733530176}, 'timestamp': '2025-09-10 02:18:33.342006', 'step': 1252, 'epoch': 1} {'type': 'loss', 'content': 0.014274738729000092, 'timestamp': '2025-09-10 02:18:33.351794', 'step': 1253, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 8068526078144}, 'timestamp': '2025-09-10 02:18:33.383906', 'step': 1253, 'epoch': 1} {'type': 'loss', 'content': 0.01211103331297636, 'timestamp': '2025-09-10 02:18:33.393822', 'step': 1254, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 6644714900096}, 'timestamp': '2025-09-10 02:18:33.427460', 'step': 1254, 'epoch': 1} {'type': 'loss', 'content': 0.0014368664706125855, 'timestamp': '2025-09-10 02:18:33.434463', 'step': 1255, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 6170111174080}, 'timestamp': '2025-09-10 02:18:33.465462', 'step': 1255, 'epoch': 1} {'type': 'loss', 'content': 0.024746278300881386, 'timestamp': '2025-09-10 02:18:33.493327', 'step': 1256, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 7593922352128}, 'timestamp': '2025-09-10 02:18:33.524549', 'step': 1256, 'epoch': 1} {'type': 'loss', 'content': 0.008130094036459923, 'timestamp': '2025-09-10 02:18:33.529833', 'step': 1257, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 5695507448064}, 'timestamp': '2025-09-10 02:18:33.561516', 'step': 1257, 'epoch': 1} {'type': 'loss', 'content': 0.024972526356577873, 'timestamp': '2025-09-10 02:18:33.565954', 'step': 1258, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 5695507448064}, 'timestamp': '2025-09-10 02:18:33.596831', 'step': 1258, 'epoch': 1} {'type': 'loss', 'content': 0.027556994929909706, 'timestamp': '2025-09-10 02:18:33.601277', 'step': 1259, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 7593922352128}, 'timestamp': '2025-09-10 02:18:33.632346', 'step': 1259, 'epoch': 1} {'type': 'loss', 'content': 0.018490461632609367, 'timestamp': '2025-09-10 02:18:33.661045', 'step': 1260, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 336], 'flops': 9966940982208}, 'timestamp': '2025-09-10 02:18:33.695309', 'step': 1260, 'epoch': 1} {'type': 'loss', 'content': 0.009816362522542477, 'timestamp': '2025-09-10 02:18:33.708052', 'step': 1261, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 416], 'flops': 12339959612288}, 'timestamp': '2025-09-10 02:18:33.750489', 'step': 1261, 'epoch': 1} {'type': 'loss', 'content': 0.016465116292238235, 'timestamp': '2025-09-10 02:18:33.766332', 'step': 1262, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 8543129804160}, 'timestamp': '2025-09-10 02:18:33.799401', 'step': 1262, 'epoch': 1} {'type': 'loss', 'content': 0.001814844785258174, 'timestamp': '2025-09-10 02:18:33.810308', 'step': 1263, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 6644714900096}, 'timestamp': '2025-09-10 02:18:33.845201', 'step': 1263, 'epoch': 1} {'type': 'loss', 'content': 0.013563080690801144, 'timestamp': '2025-09-10 02:18:33.873063', 'step': 1264, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 6170111174080}, 'timestamp': '2025-09-10 02:18:33.908938', 'step': 1264, 'epoch': 1} {'type': 'loss', 'content': 0.016335798427462578, 'timestamp': '2025-09-10 02:18:33.913348', 'step': 1265, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 8543129804160}, 'timestamp': '2025-09-10 02:18:33.946699', 'step': 1265, 'epoch': 1} {'type': 'loss', 'content': 0.01673940010368824, 'timestamp': '2025-09-10 02:18:33.957355', 'step': 1266, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 320], 'flops': 9492337256192}, 'timestamp': '2025-09-10 02:18:33.991526', 'step': 1266, 'epoch': 1} {'type': 'loss', 'content': 0.033870987594127655, 'timestamp': '2025-09-10 02:18:34.004095', 'step': 1267, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 8068526078144}, 'timestamp': '2025-09-10 02:18:34.038372', 'step': 1267, 'epoch': 1} {'type': 'loss', 'content': 0.005386251490563154, 'timestamp': '2025-09-10 02:18:34.069241', 'step': 1268, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 320], 'flops': 9492337256192}, 'timestamp': '2025-09-10 02:18:34.107804', 'step': 1268, 'epoch': 1} {'type': 'loss', 'content': 0.030843589454889297, 'timestamp': '2025-09-10 02:18:34.117801', 'step': 1269, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 6170111174080}, 'timestamp': '2025-09-10 02:18:34.151381', 'step': 1269, 'epoch': 1} {'type': 'loss', 'content': 0.00869796983897686, 'timestamp': '2025-09-10 02:18:34.158066', 'step': 1270, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 8543129804160}, 'timestamp': '2025-09-10 02:18:34.189862', 'step': 1270, 'epoch': 1} {'type': 'loss', 'content': 0.046526242047548294, 'timestamp': '2025-09-10 02:18:34.200281', 'step': 1271, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 512], 'flops': 15187581968384}, 'timestamp': '2025-09-10 02:18:34.245614', 'step': 1271, 'epoch': 1} {'type': 'loss', 'content': 0.011616252362728119, 'timestamp': '2025-09-10 02:18:34.284123', 'step': 1272, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 400], 'flops': 11865355886272}, 'timestamp': '2025-09-10 02:18:34.323074', 'step': 1272, 'epoch': 1} {'type': 'loss', 'content': 0.007240879815071821, 'timestamp': '2025-09-10 02:18:34.338246', 'step': 1273, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 5695507448064}, 'timestamp': '2025-09-10 02:18:34.372144', 'step': 1273, 'epoch': 1} {'type': 'loss', 'content': 0.010705935768783092, 'timestamp': '2025-09-10 02:18:34.376353', 'step': 1274, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 6644714900096}, 'timestamp': '2025-09-10 02:18:34.407558', 'step': 1274, 'epoch': 1} {'type': 'loss', 'content': 0.004706122912466526, 'timestamp': '2025-09-10 02:18:34.414541', 'step': 1275, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 176], 'flops': 5220903722048}, 'timestamp': '2025-09-10 02:18:34.448868', 'step': 1275, 'epoch': 1} {'type': 'loss', 'content': 0.01904475688934326, 'timestamp': '2025-09-10 02:18:34.473491', 'step': 1276, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 5695507448064}, 'timestamp': '2025-09-10 02:18:34.509351', 'step': 1276, 'epoch': 1} {'type': 'loss', 'content': 0.004496569279581308, 'timestamp': '2025-09-10 02:18:34.513801', 'step': 1277, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 8543129804160}, 'timestamp': '2025-09-10 02:18:34.554885', 'step': 1277, 'epoch': 1} {'type': 'loss', 'content': 0.03280925750732422, 'timestamp': '2025-09-10 02:18:34.565678', 'step': 1278, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 6644714900096}, 'timestamp': '2025-09-10 02:18:34.602218', 'step': 1278, 'epoch': 1} {'type': 'loss', 'content': 0.005936585366725922, 'timestamp': '2025-09-10 02:18:34.609147', 'step': 1279, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 5695507448064}, 'timestamp': '2025-09-10 02:18:34.640172', 'step': 1279, 'epoch': 1} {'type': 'loss', 'content': 0.007575146853923798, 'timestamp': '2025-09-10 02:18:34.665352', 'step': 1280, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-10 02:18:34.696244', 'step': 1280, 'epoch': 1} {'type': 'loss', 'content': 0.012064780108630657, 'timestamp': '2025-09-10 02:18:34.699504', 'step': 1281, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 8068526078144}, 'timestamp': '2025-09-10 02:18:34.732312', 'step': 1281, 'epoch': 1} {'type': 'loss', 'content': 0.012344618327915668, 'timestamp': '2025-09-10 02:18:34.742146', 'step': 1282, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 368], 'flops': 10916148434240}, 'timestamp': '2025-09-10 02:18:34.778396', 'step': 1282, 'epoch': 1} {'type': 'loss', 'content': 0.025485141202807426, 'timestamp': '2025-09-10 02:18:34.792184', 'step': 1283, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 6644714900096}, 'timestamp': '2025-09-10 02:18:34.827800', 'step': 1283, 'epoch': 1} {'type': 'loss', 'content': 0.005437423940747976, 'timestamp': '2025-09-10 02:18:34.855789', 'step': 1284, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 6644714900096}, 'timestamp': '2025-09-10 02:18:34.887426', 'step': 1284, 'epoch': 1} {'type': 'loss', 'content': 0.023022016510367393, 'timestamp': '2025-09-10 02:18:34.891764', 'step': 1285, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 304], 'flops': 9017733530176}, 'timestamp': '2025-09-10 02:18:34.929544', 'step': 1285, 'epoch': 1} {'type': 'loss', 'content': 0.003953091334551573, 'timestamp': '2025-09-10 02:18:34.941312', 'step': 1286, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 304], 'flops': 9017733530176}, 'timestamp': '2025-09-10 02:18:34.971935', 'step': 1286, 'epoch': 1} {'type': 'loss', 'content': 0.005520283244550228, 'timestamp': '2025-09-10 02:18:34.983936', 'step': 1287, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 320], 'flops': 9492337256192}, 'timestamp': '2025-09-10 02:18:35.017990', 'step': 1287, 'epoch': 1} {'type': 'loss', 'content': 0.014434975571930408, 'timestamp': '2025-09-10 02:18:35.051393', 'step': 1288, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 336], 'flops': 9966940982208}, 'timestamp': '2025-09-10 02:18:35.084410', 'step': 1288, 'epoch': 1} {'type': 'loss', 'content': 0.011467205360531807, 'timestamp': '2025-09-10 02:18:35.096911', 'step': 1289, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 6644714900096}, 'timestamp': '2025-09-10 02:18:35.127795', 'step': 1289, 'epoch': 1} {'type': 'loss', 'content': 0.020779237151145935, 'timestamp': '2025-09-10 02:18:35.134525', 'step': 1290, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 6644714900096}, 'timestamp': '2025-09-10 02:18:35.168761', 'step': 1290, 'epoch': 1} {'type': 'loss', 'content': 0.0018716433551162481, 'timestamp': '2025-09-10 02:18:35.175865', 'step': 1291, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 176], 'flops': 5220903722048}, 'timestamp': '2025-09-10 02:18:35.206686', 'step': 1291, 'epoch': 1} {'type': 'loss', 'content': 0.0036745467223227024, 'timestamp': '2025-09-10 02:18:35.231575', 'step': 1292, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 8068526078144}, 'timestamp': '2025-09-10 02:18:35.262541', 'step': 1292, 'epoch': 1} {'type': 'loss', 'content': 0.016196925193071365, 'timestamp': '2025-09-10 02:18:35.270330', 'step': 1293, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 8543129804160}, 'timestamp': '2025-09-10 02:18:35.301030', 'step': 1293, 'epoch': 1} {'type': 'loss', 'content': 0.0035647223703563213, 'timestamp': '2025-09-10 02:18:35.311839', 'step': 1294, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 4746299996032}, 'timestamp': '2025-09-10 02:18:35.343584', 'step': 1294, 'epoch': 1} {'type': 'loss', 'content': 0.010751097463071346, 'timestamp': '2025-09-10 02:18:35.346429', 'step': 1295, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 7593922352128}, 'timestamp': '2025-09-10 02:18:35.377073', 'step': 1295, 'epoch': 1} {'type': 'loss', 'content': 0.0385432243347168, 'timestamp': '2025-09-10 02:18:35.405578', 'step': 1296, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 368], 'flops': 10916148434240}, 'timestamp': '2025-09-10 02:18:35.438840', 'step': 1296, 'epoch': 1} {'type': 'loss', 'content': 0.026758210733532906, 'timestamp': '2025-09-10 02:18:35.451961', 'step': 1297, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 8543129804160}, 'timestamp': '2025-09-10 02:18:35.484987', 'step': 1297, 'epoch': 1} {'type': 'loss', 'content': 0.01798836700618267, 'timestamp': '2025-09-10 02:18:35.495753', 'step': 1298, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 5695507448064}, 'timestamp': '2025-09-10 02:18:35.529550', 'step': 1298, 'epoch': 1} {'type': 'loss', 'content': 0.01102465484291315, 'timestamp': '2025-09-10 02:18:35.533877', 'step': 1299, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 176], 'flops': 5220903722048}, 'timestamp': '2025-09-10 02:18:35.564153', 'step': 1299, 'epoch': 1} {'type': 'loss', 'content': 0.011853739619255066, 'timestamp': '2025-09-10 02:18:35.589056', 'step': 1300, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 304], 'flops': 9017733530176}, 'timestamp': '2025-09-10 02:18:35.620634', 'step': 1300, 'epoch': 1} {'type': 'loss', 'content': 0.03611797094345093, 'timestamp': '2025-09-10 02:18:35.630245', 'step': 1301, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 464], 'flops': 13763770790336}, 'timestamp': '2025-09-10 02:18:35.670917', 'step': 1301, 'epoch': 1} {'type': 'loss', 'content': 0.0027977568097412586, 'timestamp': '2025-09-10 02:18:35.688020', 'step': 1302, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 6644714900096}, 'timestamp': '2025-09-10 02:18:35.719854', 'step': 1302, 'epoch': 1} {'type': 'loss', 'content': 0.009604268707334995, 'timestamp': '2025-09-10 02:18:35.726938', 'step': 1303, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 6644714900096}, 'timestamp': '2025-09-10 02:18:35.758003', 'step': 1303, 'epoch': 1} {'type': 'loss', 'content': 0.002295356709510088, 'timestamp': '2025-09-10 02:18:35.785829', 'step': 1304, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 416], 'flops': 12339959612288}, 'timestamp': '2025-09-10 02:18:35.822533', 'step': 1304, 'epoch': 1} {'type': 'loss', 'content': 0.008270000107586384, 'timestamp': '2025-09-10 02:18:35.837973', 'step': 1305, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-10 02:18:35.872182', 'step': 1305, 'epoch': 1} {'type': 'loss', 'content': 0.01986978016793728, 'timestamp': '2025-09-10 02:18:35.875338', 'step': 1306, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 8068526078144}, 'timestamp': '2025-09-10 02:18:35.911087', 'step': 1306, 'epoch': 1} {'type': 'loss', 'content': 0.023896988481283188, 'timestamp': '2025-09-10 02:18:35.921057', 'step': 1307, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 336], 'flops': 9966940982208}, 'timestamp': '2025-09-10 02:18:35.957355', 'step': 1307, 'epoch': 1} {'type': 'loss', 'content': 0.020381931215524673, 'timestamp': '2025-09-10 02:18:35.991669', 'step': 1308, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 7593922352128}, 'timestamp': '2025-09-10 02:18:36.026271', 'step': 1308, 'epoch': 1} {'type': 'loss', 'content': 0.014767967164516449, 'timestamp': '2025-09-10 02:18:36.031507', 'step': 1309, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 5695507448064}, 'timestamp': '2025-09-10 02:18:36.065762', 'step': 1309, 'epoch': 1} {'type': 'loss', 'content': 0.002422439632937312, 'timestamp': '2025-09-10 02:18:36.070149', 'step': 1310, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 7593922352128}, 'timestamp': '2025-09-10 02:18:36.100810', 'step': 1310, 'epoch': 1} {'type': 'loss', 'content': 0.024649931117892265, 'timestamp': '2025-09-10 02:18:36.108445', 'step': 1311, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 8068526078144}, 'timestamp': '2025-09-10 02:18:36.141430', 'step': 1311, 'epoch': 1} {'type': 'loss', 'content': 0.009912949986755848, 'timestamp': '2025-09-10 02:18:36.172465', 'step': 1312, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 8543129804160}, 'timestamp': '2025-09-10 02:18:36.203763', 'step': 1312, 'epoch': 1} {'type': 'loss', 'content': 0.002744142198935151, 'timestamp': '2025-09-10 02:18:36.212204', 'step': 1313, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 304], 'flops': 9017733530176}, 'timestamp': '2025-09-10 02:18:36.243525', 'step': 1313, 'epoch': 1} {'type': 'loss', 'content': 0.007335959933698177, 'timestamp': '2025-09-10 02:18:36.255726', 'step': 1314, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 6170111174080}, 'timestamp': '2025-09-10 02:18:36.287489', 'step': 1314, 'epoch': 1} {'type': 'loss', 'content': 0.041926268488168716, 'timestamp': '2025-09-10 02:18:36.294324', 'step': 1315, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 8543129804160}, 'timestamp': '2025-09-10 02:18:36.326188', 'step': 1315, 'epoch': 1} {'type': 'loss', 'content': 0.0036907510366290808, 'timestamp': '2025-09-10 02:18:36.357722', 'step': 1316, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 368], 'flops': 10916148434240}, 'timestamp': '2025-09-10 02:18:36.391305', 'step': 1316, 'epoch': 1} {'type': 'loss', 'content': 0.00216344790533185, 'timestamp': '2025-09-10 02:18:36.404473', 'step': 1317, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 432], 'flops': 12814563338304}, 'timestamp': '2025-09-10 02:18:36.445720', 'step': 1317, 'epoch': 1} {'type': 'loss', 'content': 0.009386607445776463, 'timestamp': '2025-09-10 02:18:36.461893', 'step': 1318, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 7119318626112}, 'timestamp': '2025-09-10 02:18:36.494105', 'step': 1318, 'epoch': 1} {'type': 'loss', 'content': 0.030109494924545288, 'timestamp': '2025-09-10 02:18:36.501386', 'step': 1319, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 320], 'flops': 9492337256192}, 'timestamp': '2025-09-10 02:18:36.533395', 'step': 1319, 'epoch': 1} {'type': 'loss', 'content': 0.0012107326183468103, 'timestamp': '2025-09-10 02:18:36.566829', 'step': 1320, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 6644714900096}, 'timestamp': '2025-09-10 02:18:36.598400', 'step': 1320, 'epoch': 1} {'type': 'loss', 'content': 0.008304606191813946, 'timestamp': '2025-09-10 02:18:36.602975', 'step': 1321, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 7593922352128}, 'timestamp': '2025-09-10 02:18:36.634882', 'step': 1321, 'epoch': 1} {'type': 'loss', 'content': 0.022128764539957047, 'timestamp': '2025-09-10 02:18:36.642598', 'step': 1322, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 320], 'flops': 9492337256192}, 'timestamp': '2025-09-10 02:18:36.673862', 'step': 1322, 'epoch': 1} {'type': 'loss', 'content': 0.009982970543205738, 'timestamp': '2025-09-10 02:18:36.686427', 'step': 1323, 'epoch': 1} {'type': 'flops', 'content': [{'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5695213340928}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3796808916480}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4429610391296}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4746011128704}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4746011128704}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6011614078336}, {'type': 'perplexity', 'in_batch_dim': [8, 736], 'batch_size': 8, 'flops': 14554433988352}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3796808916480}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4113209653888}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3796808916480}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4746011128704}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2847606704256}, {'type': 'perplexity', 'in_batch_dim': [8, 320], 'batch_size': 8, 'flops': 6328014815744}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5378812603520}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3796808916480}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4746011128704}, {'type': 'perplexity', 'in_batch_dim': [8, 352], 'batch_size': 8, 'flops': 6960816290560}, {'type': 'perplexity', 'in_batch_dim': [8, 384], 'batch_size': 8, 'flops': 7593617765376}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5378812603520}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4429610391296}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4113209653888}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5062411866112}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5062411866112}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4429610391296}, {'type': 'perplexity', 'in_batch_dim': [8, 480], 'batch_size': 8, 'flops': 9492022189824}, {'type': 'perplexity', 'in_batch_dim': [8, 448], 'batch_size': 8, 'flops': 8859220715008}, {'type': 'perplexity', 'in_batch_dim': [8, 320], 'batch_size': 8, 'flops': 6328014815744}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3796808916480}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6011614078336}, {'type': 'perplexity', 'in_batch_dim': [8, 320], 'batch_size': 8, 'flops': 6328014815744}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5695213340928}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4746011128704}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4429610391296}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5378812603520}, {'type': 'perplexity', 'in_batch_dim': [8, 544], 'batch_size': 8, 'flops': 10757625139456}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4429610391296}, {'type': 'perplexity', 'in_batch_dim': [8, 320], 'batch_size': 8, 'flops': 6328014815744}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4113209653888}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4113209653888}, {'type': 'perplexity', 'in_batch_dim': [8, 320], 'batch_size': 8, 'flops': 6328014815744}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3480408179072}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5062411866112}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6011614078336}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4429610391296}, {'type': 'perplexity', 'in_batch_dim': [8, 384], 'batch_size': 8, 'flops': 7593617765376}, {'type': 'perplexity', 'in_batch_dim': [8, 416], 'batch_size': 8, 'flops': 8226419240192}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4746011128704}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3480408179072}, {'type': 'perplexity', 'in_batch_dim': [8, 384], 'batch_size': 8, 'flops': 7593617765376}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3480408179072}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3480408179072}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5378812603520}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3796808916480}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4113209653888}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5062411866112}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5378812603520}, {'type': 'perplexity', 'in_batch_dim': [8, 320], 'batch_size': 8, 'flops': 6328014815744}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4429610391296}, {'type': 'perplexity', 'in_batch_dim': [8, 160], 'batch_size': 8, 'flops': 3164007441664}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4113209653888}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5695213340928}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3796808916480}, {'type': 'perplexity', 'in_batch_dim': [8, 160], 'batch_size': 8, 'flops': 3164007441664}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6011614078336}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5062411866112}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4429610391296}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3480408179072}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4746011128704}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3480408179072}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4113209653888}, {'type': 'perplexity', 'in_batch_dim': [8, 624], 'batch_size': 8, 'flops': 12339628826496}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4113209653888}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5695213340928}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5062411866112}, {'type': 'perplexity', 'in_batch_dim': [8, 384], 'batch_size': 8, 'flops': 7593617765376}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4429610391296}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4113209653888}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5062411866112}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4429610391296}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5695213340928}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3796808916480}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5378812603520}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4429610391296}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5378812603520}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3796808916480}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5695213340928}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4429610391296}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5062411866112}, {'type': 'perplexity', 'in_batch_dim': [8, 160], 'batch_size': 8, 'flops': 3164007441664}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4746011128704}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5695213340928}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3796808916480}, {'type': 'perplexity', 'in_batch_dim': [8, 416], 'batch_size': 8, 'flops': 8226419240192}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3796808916480}, {'type': 'perplexity', 'in_batch_dim': [8, 160], 'batch_size': 8, 'flops': 3164007441664}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4746011128704}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3796808916480}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2847606704256}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5378812603520}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4429610391296}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4113209653888}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4429610391296}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6011614078336}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4113209653888}, {'type': 'perplexity', 'in_batch_dim': [8, 320], 'batch_size': 8, 'flops': 6328014815744}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4746011128704}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4746011128704}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3796808916480}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5062411866112}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3480408179072}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3796808916480}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3480408179072}, {'type': 'perplexity', 'in_batch_dim': [8, 352], 'batch_size': 8, 'flops': 6960816290560}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5062411866112}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4429610391296}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6011614078336}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5062411866112}, {'type': 'perplexity', 'in_batch_dim': [8, 576], 'batch_size': 8, 'flops': 11390426614272}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4429610391296}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3796808916480}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6011614078336}, {'type': 'perplexity', 'in_batch_dim': [8, 432], 'batch_size': 8, 'flops': 8542819977600}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5378812603520}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5378812603520}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5062411866112}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4113209653888}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4113209653888}, {'type': 'perplexity', 'in_batch_dim': [8, 368], 'batch_size': 8, 'flops': 7277217027968}, {'type': 'perplexity', 'in_batch_dim': [8, 160], 'batch_size': 8, 'flops': 3164007441664}, {'type': 'perplexity', 'in_batch_dim': [8, 368], 'batch_size': 8, 'flops': 7277217027968}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3796808916480}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4746011128704}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4113209653888}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4429610391296}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5695213340928}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5695213340928}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5062411866112}, {'type': 'perplexity', 'in_batch_dim': [8, 432], 'batch_size': 8, 'flops': 8542819977600}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5062411866112}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4113209653888}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3480408179072}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3480408179072}, {'type': 'perplexity', 'in_batch_dim': [8, 160], 'batch_size': 8, 'flops': 3164007441664}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4113209653888}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3796808916480}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5062411866112}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 368], 'batch_size': 8, 'flops': 7277217027968}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5378812603520}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5378812603520}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5062411866112}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2847606704256}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4429610391296}, {'type': 'perplexity', 'in_batch_dim': [8, 320], 'batch_size': 8, 'flops': 6328014815744}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3796808916480}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4429610391296}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4113209653888}, {'type': 'perplexity', 'in_batch_dim': [8, 384], 'batch_size': 8, 'flops': 7593617765376}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5378812603520}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4746011128704}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3480408179072}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5062411866112}, {'type': 'perplexity', 'in_batch_dim': [8, 320], 'batch_size': 8, 'flops': 6328014815744}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5378812603520}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4113209653888}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4113209653888}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5695213340928}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3480408179072}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6011614078336}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4746011128704}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5378812603520}, {'type': 'perplexity', 'in_batch_dim': [8, 320], 'batch_size': 8, 'flops': 6328014815744}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4429610391296}, {'type': 'perplexity', 'in_batch_dim': [8, 320], 'batch_size': 8, 'flops': 6328014815744}, {'type': 'perplexity', 'in_batch_dim': [8, 416], 'batch_size': 8, 'flops': 8226419240192}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5062411866112}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5062411866112}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2847606704256}, {'type': 'perplexity', 'in_batch_dim': [8, 336], 'batch_size': 8, 'flops': 6644415553152}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6011614078336}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5062411866112}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4113209653888}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5695213340928}, {'type': 'perplexity', 'in_batch_dim': [8, 416], 'batch_size': 8, 'flops': 8226419240192}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3796808916480}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5695213340928}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4746011128704}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4429610391296}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4113209653888}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3480408179072}, {'type': 'perplexity', 'in_batch_dim': [8, 320], 'batch_size': 8, 'flops': 6328014815744}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3480408179072}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5378812603520}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6011614078336}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4429610391296}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5695213340928}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5695213340928}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3480408179072}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4429610391296}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5062411866112}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3480408179072}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4113209653888}, {'type': 'perplexity', 'in_batch_dim': [8, 352], 'batch_size': 8, 'flops': 6960816290560}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4113209653888}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2847606704256}, {'type': 'perplexity', 'in_batch_dim': [8, 160], 'batch_size': 8, 'flops': 3164007441664}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3796808916480}, {'type': 'perplexity', 'in_batch_dim': [8, 496], 'batch_size': 8, 'flops': 9808422927232}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3480408179072}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4113209653888}, {'type': 'perplexity', 'in_batch_dim': [8, 544], 'batch_size': 8, 'flops': 10757625139456}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6011614078336}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5378812603520}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5695213340928}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4746011128704}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5695213340928}, {'type': 'perplexity', 'in_batch_dim': [8, 384], 'batch_size': 8, 'flops': 7593617765376}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3480408179072}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5378812603520}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3480408179072}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5378812603520}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5062411866112}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3796808916480}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5062411866112}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5695213340928}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5378812603520}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4429610391296}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5062411866112}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3480408179072}, {'type': 'perplexity', 'in_batch_dim': [8, 656], 'batch_size': 8, 'flops': 12972430301312}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5062411866112}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3796808916480}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3480408179072}, {'type': 'perplexity', 'in_batch_dim': [8, 160], 'batch_size': 8, 'flops': 3164007441664}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5695213340928}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6011614078336}, {'type': 'perplexity', 'in_batch_dim': [8, 320], 'batch_size': 8, 'flops': 6328014815744}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5062411866112}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5062411866112}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5695213340928}, {'type': 'perplexity', 'in_batch_dim': [8, 320], 'batch_size': 8, 'flops': 6328014815744}, {'type': 'perplexity', 'in_batch_dim': [8, 384], 'batch_size': 8, 'flops': 7593617765376}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5062411866112}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4746011128704}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4113209653888}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4746011128704}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4746011128704}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3480408179072}, {'type': 'perplexity', 'in_batch_dim': [8, 320], 'batch_size': 8, 'flops': 6328014815744}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5695213340928}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5062411866112}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4113209653888}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4746011128704}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2847606704256}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3796808916480}, {'type': 'perplexity', 'in_batch_dim': [8, 336], 'batch_size': 8, 'flops': 6644415553152}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4113209653888}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4746011128704}, {'type': 'perplexity', 'in_batch_dim': [8, 496], 'batch_size': 8, 'flops': 9808422927232}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5378812603520}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5378812603520}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4429610391296}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5062411866112}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6011614078336}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4746011128704}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3796808916480}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4113209653888}, {'type': 'perplexity', 'in_batch_dim': [8, 496], 'batch_size': 8, 'flops': 9808422927232}, {'type': 'perplexity', 'in_batch_dim': [8, 448], 'batch_size': 8, 'flops': 8859220715008}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6011614078336}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3796808916480}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3480408179072}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5062411866112}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4429610391296}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4113209653888}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4746011128704}, {'type': 'perplexity', 'in_batch_dim': [8, 320], 'batch_size': 8, 'flops': 6328014815744}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6011614078336}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3480408179072}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3796808916480}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3796808916480}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4746011128704}, {'type': 'perplexity', 'in_batch_dim': [8, 416], 'batch_size': 8, 'flops': 8226419240192}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5695213340928}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6011614078336}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6011614078336}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4746011128704}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3480408179072}, {'type': 'perplexity', 'in_batch_dim': [8, 160], 'batch_size': 8, 'flops': 3164007441664}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5695213340928}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3480408179072}, {'type': 'perplexity', 'in_batch_dim': [8, 400], 'batch_size': 8, 'flops': 7910018502784}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4429610391296}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5378812603520}, {'type': 'perplexity', 'in_batch_dim': [8, 400], 'batch_size': 8, 'flops': 7910018502784}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5378812603520}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3480408179072}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6011614078336}, {'type': 'perplexity', 'in_batch_dim': [8, 160], 'batch_size': 8, 'flops': 3164007441664}, {'type': 'perplexity', 'in_batch_dim': [8, 352], 'batch_size': 8, 'flops': 6960816290560}, {'type': 'perplexity', 'in_batch_dim': [8, 320], 'batch_size': 8, 'flops': 6328014815744}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5062411866112}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5062411866112}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4113209653888}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4429610391296}, {'type': 'perplexity', 'in_batch_dim': [8, 352], 'batch_size': 8, 'flops': 6960816290560}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4113209653888}, {'type': 'perplexity', 'in_batch_dim': [8, 160], 'batch_size': 8, 'flops': 3164007441664}, {'type': 'perplexity', 'in_batch_dim': [8, 336], 'batch_size': 8, 'flops': 6644415553152}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4429610391296}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3796808916480}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5378812603520}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5378812603520}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4113209653888}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5378812603520}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3796808916480}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5378812603520}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5378812603520}, {'type': 'perplexity', 'in_batch_dim': [8, 160], 'batch_size': 8, 'flops': 3164007441664}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3480408179072}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6011614078336}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5695213340928}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5378812603520}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5378812603520}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4429610391296}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4746011128704}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6011614078336}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5062411866112}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6011614078336}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5378812603520}, {'type': 'perplexity', 'in_batch_dim': [8, 160], 'batch_size': 8, 'flops': 3164007441664}, {'type': 'perplexity', 'in_batch_dim': [8, 160], 'batch_size': 8, 'flops': 3164007441664}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5062411866112}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5062411866112}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3796808916480}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6011614078336}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4113209653888}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4746011128704}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5378812603520}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3480408179072}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3796808916480}, {'type': 'perplexity', 'in_batch_dim': [8, 320], 'batch_size': 8, 'flops': 6328014815744}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5062411866112}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4746011128704}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5062411866112}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4746011128704}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6011614078336}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3796808916480}, {'type': 'perplexity', 'in_batch_dim': [8, 160], 'batch_size': 8, 'flops': 3164007441664}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2847606704256}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4429610391296}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4746011128704}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5062411866112}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5378812603520}, {'type': 'perplexity', 'in_batch_dim': [8, 464], 'batch_size': 8, 'flops': 9175621452416}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4113209653888}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3480408179072}, {'type': 'perplexity', 'in_batch_dim': [8, 384], 'batch_size': 8, 'flops': 7593617765376}, {'type': 'perplexity', 'in_batch_dim': [8, 448], 'batch_size': 8, 'flops': 8859220715008}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5378812603520}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4113209653888}, {'type': 'perplexity', 'in_batch_dim': [8, 320], 'batch_size': 8, 'flops': 6328014815744}, {'type': 'perplexity', 'in_batch_dim': [8, 560], 'batch_size': 8, 'flops': 11074025876864}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5378812603520}, {'type': 'perplexity', 'in_batch_dim': [8, 160], 'batch_size': 8, 'flops': 3164007441664}, {'type': 'perplexity', 'in_batch_dim': [8, 320], 'batch_size': 8, 'flops': 6328014815744}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5062411866112}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6011614078336}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4746011128704}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6011614078336}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3796808916480}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4429610391296}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4113209653888}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4746011128704}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3480408179072}, {'type': 'perplexity', 'in_batch_dim': [8, 384], 'batch_size': 8, 'flops': 7593617765376}, {'type': 'perplexity', 'in_batch_dim': [8, 576], 'batch_size': 8, 'flops': 11390426614272}, {'type': 'perplexity', 'in_batch_dim': [8, 160], 'batch_size': 8, 'flops': 3164007441664}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4746011128704}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4746011128704}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4429610391296}, {'type': 'perplexity', 'in_batch_dim': [8, 336], 'batch_size': 8, 'flops': 6644415553152}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4746011128704}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3796808916480}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3796808916480}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5062411866112}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5378812603520}, {'type': 'perplexity', 'in_batch_dim': [8, 368], 'batch_size': 8, 'flops': 7277217027968}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3480408179072}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4113209653888}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4746011128704}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3480408179072}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4746011128704}, {'type': 'perplexity', 'in_batch_dim': [8, 1168], 'batch_size': 8, 'flops': 23097253898368}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5062411866112}, {'type': 'perplexity', 'in_batch_dim': [8, 160], 'batch_size': 8, 'flops': 3164007441664}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4746011128704}, {'type': 'perplexity', 'in_batch_dim': [8, 640], 'batch_size': 8, 'flops': 12656029563904}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5378812603520}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4429610391296}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4429610391296}, {'type': 'perplexity', 'in_batch_dim': [8, 384], 'batch_size': 8, 'flops': 7593617765376}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3480408179072}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4113209653888}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3796808916480}, {'type': 'perplexity', 'in_batch_dim': [8, 336], 'batch_size': 8, 'flops': 6644415553152}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3480408179072}, {'type': 'perplexity', 'in_batch_dim': [6, 208], 'batch_size': 8, 'flops': 4113209653888}], 'timestamp': '2025-09-10 02:18:46.851890', 'step': 1323, 'epoch': 1} {'type': 'pplx', 'content': 13954997.402758988, 'timestamp': '2025-09-10 02:18:46.854605', 'step': 1323, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 336], 'flops': 9966940982208}, 'timestamp': '2025-09-10 02:18:46.886451', 'step': 1323, 'epoch': 1} {'type': 'loss', 'content': 0.018629444763064384, 'timestamp': '2025-09-10 02:18:46.920587', 'step': 1324, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 304], 'flops': 9017733530176}, 'timestamp': '2025-09-10 02:18:46.952098', 'step': 1324, 'epoch': 1} {'type': 'loss', 'content': 0.015395854599773884, 'timestamp': '2025-09-10 02:18:46.960873', 'step': 1325, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 8543129804160}, 'timestamp': '2025-09-10 02:18:46.992652', 'step': 1325, 'epoch': 1} {'type': 'loss', 'content': 0.0029722540639340878, 'timestamp': '2025-09-10 02:18:47.002965', 'step': 1326, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 336], 'flops': 9966940982208}, 'timestamp': '2025-09-10 02:18:47.036788', 'step': 1326, 'epoch': 1} {'type': 'loss', 'content': 0.0171508826315403, 'timestamp': '2025-09-10 02:18:47.050107', 'step': 1327, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 7593922352128}, 'timestamp': '2025-09-10 02:18:47.081821', 'step': 1327, 'epoch': 1} {'type': 'loss', 'content': 0.004396271891891956, 'timestamp': '2025-09-10 02:18:47.110082', 'step': 1328, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 6644714900096}, 'timestamp': '2025-09-10 02:18:47.141128', 'step': 1328, 'epoch': 1} {'type': 'loss', 'content': 0.03813646361231804, 'timestamp': '2025-09-10 02:18:47.145678', 'step': 1329, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 320], 'flops': 9492337256192}, 'timestamp': '2025-09-10 02:18:47.177598', 'step': 1329, 'epoch': 1} {'type': 'loss', 'content': 0.005988952703773975, 'timestamp': '2025-09-10 02:18:47.189873', 'step': 1330, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 336], 'flops': 9966940982208}, 'timestamp': '2025-09-10 02:18:47.224977', 'step': 1330, 'epoch': 1} {'type': 'loss', 'content': 0.03351500257849693, 'timestamp': '2025-09-10 02:18:47.238370', 'step': 1331, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 5695507448064}, 'timestamp': '2025-09-10 02:18:47.271603', 'step': 1331, 'epoch': 1} {'type': 'loss', 'content': 0.003736252663657069, 'timestamp': '2025-09-10 02:18:47.296535', 'step': 1332, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 304], 'flops': 9017733530176}, 'timestamp': '2025-09-10 02:18:47.328532', 'step': 1332, 'epoch': 1} {'type': 'loss', 'content': 0.03064594976603985, 'timestamp': '2025-09-10 02:18:47.337440', 'step': 1333, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 8543129804160}, 'timestamp': '2025-09-10 02:18:47.369734', 'step': 1333, 'epoch': 1} {'type': 'loss', 'content': 0.02222239412367344, 'timestamp': '2025-09-10 02:18:47.380141', 'step': 1334, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 352], 'flops': 10441544708224}, 'timestamp': '2025-09-10 02:18:47.416111', 'step': 1334, 'epoch': 1} {'type': 'loss', 'content': 0.026622384786605835, 'timestamp': '2025-09-10 02:18:47.429721', 'step': 1335, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 336], 'flops': 9966940982208}, 'timestamp': '2025-09-10 02:18:47.464492', 'step': 1335, 'epoch': 1} {'type': 'loss', 'content': 0.008432361297309399, 'timestamp': '2025-09-10 02:18:47.498715', 'step': 1336, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 5695507448064}, 'timestamp': '2025-09-10 02:18:47.531910', 'step': 1336, 'epoch': 1} {'type': 'loss', 'content': 0.004825720097869635, 'timestamp': '2025-09-10 02:18:47.534233', 'step': 1337, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 6170111174080}, 'timestamp': '2025-09-10 02:18:47.565732', 'step': 1337, 'epoch': 1} {'type': 'loss', 'content': 0.004051771480590105, 'timestamp': '2025-09-10 02:18:47.572231', 'step': 1338, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 6170111174080}, 'timestamp': '2025-09-10 02:18:47.603586', 'step': 1338, 'epoch': 1} {'type': 'loss', 'content': 0.020291676744818687, 'timestamp': '2025-09-10 02:18:47.610210', 'step': 1339, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 304], 'flops': 9017733530176}, 'timestamp': '2025-09-10 02:18:47.642064', 'step': 1339, 'epoch': 1} {'type': 'loss', 'content': 0.017118671908974648, 'timestamp': '2025-09-10 02:18:47.674586', 'step': 1340, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 8068526078144}, 'timestamp': '2025-09-10 02:18:47.707026', 'step': 1340, 'epoch': 1} {'type': 'loss', 'content': 0.0032200440764427185, 'timestamp': '2025-09-10 02:18:47.714102', 'step': 1341, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 8068526078144}, 'timestamp': '2025-09-10 02:18:47.746221', 'step': 1341, 'epoch': 1} {'type': 'loss', 'content': 0.0031123815570026636, 'timestamp': '2025-09-10 02:18:47.755884', 'step': 1342, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 7119318626112}, 'timestamp': '2025-09-10 02:18:47.787649', 'step': 1342, 'epoch': 1} {'type': 'loss', 'content': 0.007114849053323269, 'timestamp': '2025-09-10 02:18:47.794652', 'step': 1343, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 304], 'flops': 9017733530176}, 'timestamp': '2025-09-10 02:18:47.826963', 'step': 1343, 'epoch': 1} {'type': 'loss', 'content': 0.00877163652330637, 'timestamp': '2025-09-10 02:18:47.859375', 'step': 1344, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 5695507448064}, 'timestamp': '2025-09-10 02:18:47.890078', 'step': 1344, 'epoch': 1} {'type': 'loss', 'content': 0.03030526638031006, 'timestamp': '2025-09-10 02:18:47.892273', 'step': 1345, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 8543129804160}, 'timestamp': '2025-09-10 02:18:47.925807', 'step': 1345, 'epoch': 1} {'type': 'loss', 'content': 0.005265057552605867, 'timestamp': '2025-09-10 02:18:47.936141', 'step': 1346, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 6170111174080}, 'timestamp': '2025-09-10 02:18:47.971811', 'step': 1346, 'epoch': 1} {'type': 'loss', 'content': 0.001451778458431363, 'timestamp': '2025-09-10 02:18:47.978351', 'step': 1347, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 7119318626112}, 'timestamp': '2025-09-10 02:18:48.009497', 'step': 1347, 'epoch': 1} {'type': 'loss', 'content': 0.011003616265952587, 'timestamp': '2025-09-10 02:18:48.037639', 'step': 1348, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 8068526078144}, 'timestamp': '2025-09-10 02:18:48.070245', 'step': 1348, 'epoch': 1} {'type': 'loss', 'content': 0.00957377441227436, 'timestamp': '2025-09-10 02:18:48.077376', 'step': 1349, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 6170111174080}, 'timestamp': '2025-09-10 02:18:48.108133', 'step': 1349, 'epoch': 1} {'type': 'loss', 'content': 0.004885104484856129, 'timestamp': '2025-09-10 02:18:48.114983', 'step': 1350, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 320], 'flops': 9492337256192}, 'timestamp': '2025-09-10 02:18:48.148190', 'step': 1350, 'epoch': 1} {'type': 'loss', 'content': 0.022131670266389847, 'timestamp': '2025-09-10 02:18:48.160564', 'step': 1351, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 368], 'flops': 10916148434240}, 'timestamp': '2025-09-10 02:18:48.197590', 'step': 1351, 'epoch': 1} {'type': 'loss', 'content': 0.004956061951816082, 'timestamp': '2025-09-10 02:18:48.232314', 'step': 1352, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 320], 'flops': 9492337256192}, 'timestamp': '2025-09-10 02:18:48.265558', 'step': 1352, 'epoch': 1} {'type': 'loss', 'content': 0.018229112029075623, 'timestamp': '2025-09-10 02:18:48.275361', 'step': 1353, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 8068526078144}, 'timestamp': '2025-09-10 02:18:48.306870', 'step': 1353, 'epoch': 1} {'type': 'loss', 'content': 0.010464141145348549, 'timestamp': '2025-09-10 02:18:48.316533', 'step': 1354, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 4746299996032}, 'timestamp': '2025-09-10 02:18:48.347729', 'step': 1354, 'epoch': 1} {'type': 'loss', 'content': 0.0036923617590218782, 'timestamp': '2025-09-10 02:18:48.349819', 'step': 1355, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 6644714900096}, 'timestamp': '2025-09-10 02:18:48.381637', 'step': 1355, 'epoch': 1} {'type': 'loss', 'content': 0.05059584602713585, 'timestamp': '2025-09-10 02:18:48.409372', 'step': 1356, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 368], 'flops': 10916148434240}, 'timestamp': '2025-09-10 02:18:48.441772', 'step': 1356, 'epoch': 1} {'type': 'loss', 'content': 0.005611395929008722, 'timestamp': '2025-09-10 02:18:48.454840', 'step': 1357, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 176], 'flops': 5220903722048}, 'timestamp': '2025-09-10 02:18:48.486090', 'step': 1357, 'epoch': 1} {'type': 'loss', 'content': 0.00173103844281286, 'timestamp': '2025-09-10 02:18:48.489918', 'step': 1358, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 320], 'flops': 9492337256192}, 'timestamp': '2025-09-10 02:18:48.521672', 'step': 1358, 'epoch': 1} {'type': 'loss', 'content': 0.00872302707284689, 'timestamp': '2025-09-10 02:18:48.534199', 'step': 1359, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 6644714900096}, 'timestamp': '2025-09-10 02:18:48.565533', 'step': 1359, 'epoch': 1} {'type': 'loss', 'content': 0.003065047785639763, 'timestamp': '2025-09-10 02:18:48.593194', 'step': 1360, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 8068526078144}, 'timestamp': '2025-09-10 02:18:48.624533', 'step': 1360, 'epoch': 1} {'type': 'loss', 'content': 0.007230323273688555, 'timestamp': '2025-09-10 02:18:48.631787', 'step': 1361, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 320], 'flops': 9492337256192}, 'timestamp': '2025-09-10 02:18:48.663902', 'step': 1361, 'epoch': 1} {'type': 'loss', 'content': 0.012289733625948429, 'timestamp': '2025-09-10 02:18:48.676196', 'step': 1362, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 352], 'flops': 10441544708224}, 'timestamp': '2025-09-10 02:18:48.711977', 'step': 1362, 'epoch': 1} {'type': 'loss', 'content': 0.05497897043824196, 'timestamp': '2025-09-10 02:18:48.725681', 'step': 1363, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 8068526078144}, 'timestamp': '2025-09-10 02:18:48.757658', 'step': 1363, 'epoch': 1} {'type': 'loss', 'content': 0.002912584925070405, 'timestamp': '2025-09-10 02:18:48.788064', 'step': 1364, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 368], 'flops': 10916148434240}, 'timestamp': '2025-09-10 02:18:48.821183', 'step': 1364, 'epoch': 1} {'type': 'loss', 'content': 0.0022109579294919968, 'timestamp': '2025-09-10 02:18:48.834324', 'step': 1365, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 7593922352128}, 'timestamp': '2025-09-10 02:18:48.866771', 'step': 1365, 'epoch': 1} {'type': 'loss', 'content': 0.018029719591140747, 'timestamp': '2025-09-10 02:18:48.874073', 'step': 1366, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 6644714900096}, 'timestamp': '2025-09-10 02:18:48.906286', 'step': 1366, 'epoch': 1} {'type': 'loss', 'content': 0.056414928287267685, 'timestamp': '2025-09-10 02:18:48.912869', 'step': 1367, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 8068526078144}, 'timestamp': '2025-09-10 02:18:48.945028', 'step': 1367, 'epoch': 1} {'type': 'loss', 'content': 0.05040454491972923, 'timestamp': '2025-09-10 02:18:48.975529', 'step': 1368, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 6644714900096}, 'timestamp': '2025-09-10 02:18:49.009208', 'step': 1368, 'epoch': 1} {'type': 'loss', 'content': 0.01217829529196024, 'timestamp': '2025-09-10 02:18:49.013682', 'step': 1369, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 336], 'flops': 9966940982208}, 'timestamp': '2025-09-10 02:18:49.048154', 'step': 1369, 'epoch': 1} {'type': 'loss', 'content': 0.02744656801223755, 'timestamp': '2025-09-10 02:18:49.061502', 'step': 1370, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 8068526078144}, 'timestamp': '2025-09-10 02:18:49.096528', 'step': 1370, 'epoch': 1} {'type': 'loss', 'content': 0.02190292812883854, 'timestamp': '2025-09-10 02:18:49.106149', 'step': 1371, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 336], 'flops': 9966940982208}, 'timestamp': '2025-09-10 02:18:49.140047', 'step': 1371, 'epoch': 1} {'type': 'loss', 'content': 0.03642702102661133, 'timestamp': '2025-09-10 02:18:49.174250', 'step': 1372, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 5695507448064}, 'timestamp': '2025-09-10 02:18:49.208227', 'step': 1372, 'epoch': 1} {'type': 'loss', 'content': 0.012048700824379921, 'timestamp': '2025-09-10 02:18:49.213934', 'step': 1373, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 7593922352128}, 'timestamp': '2025-09-10 02:18:49.255170', 'step': 1373, 'epoch': 1} {'type': 'loss', 'content': 0.02002662420272827, 'timestamp': '2025-09-10 02:18:49.262645', 'step': 1374, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 7593922352128}, 'timestamp': '2025-09-10 02:18:49.295812', 'step': 1374, 'epoch': 1} {'type': 'loss', 'content': 0.012123959138989449, 'timestamp': '2025-09-10 02:18:49.303233', 'step': 1375, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 8543129804160}, 'timestamp': '2025-09-10 02:18:49.334905', 'step': 1375, 'epoch': 1} {'type': 'loss', 'content': 0.007124970201402903, 'timestamp': '2025-09-10 02:18:49.366230', 'step': 1376, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 4746299996032}, 'timestamp': '2025-09-10 02:18:49.399376', 'step': 1376, 'epoch': 1} {'type': 'loss', 'content': 0.003386021126061678, 'timestamp': '2025-09-10 02:18:49.401904', 'step': 1377, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 8068526078144}, 'timestamp': '2025-09-10 02:18:49.434940', 'step': 1377, 'epoch': 1} {'type': 'loss', 'content': 0.0014372080331668258, 'timestamp': '2025-09-10 02:18:49.444371', 'step': 1378, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 4746299996032}, 'timestamp': '2025-09-10 02:18:49.476646', 'step': 1378, 'epoch': 1} {'type': 'loss', 'content': 0.03508186340332031, 'timestamp': '2025-09-10 02:18:49.482744', 'step': 1379, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 624], 'flops': 18509808050496}, 'timestamp': '2025-09-10 02:18:49.535283', 'step': 1379, 'epoch': 1} {'type': 'loss', 'content': 0.011475126259028912, 'timestamp': '2025-09-10 02:18:49.577910', 'step': 1380, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 6644714900096}, 'timestamp': '2025-09-10 02:18:49.614902', 'step': 1380, 'epoch': 1} {'type': 'loss', 'content': 0.0056556230410933495, 'timestamp': '2025-09-10 02:18:49.620965', 'step': 1381, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 176], 'flops': 5220903722048}, 'timestamp': '2025-09-10 02:18:49.657781', 'step': 1381, 'epoch': 1} {'type': 'loss', 'content': 0.008287766017019749, 'timestamp': '2025-09-10 02:18:49.661791', 'step': 1382, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 480], 'flops': 14238374516352}, 'timestamp': '2025-09-10 02:18:49.704027', 'step': 1382, 'epoch': 1} {'type': 'loss', 'content': 0.02117903158068657, 'timestamp': '2025-09-10 02:18:49.721395', 'step': 1383, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 320], 'flops': 9492337256192}, 'timestamp': '2025-09-10 02:18:49.754731', 'step': 1383, 'epoch': 1} {'type': 'loss', 'content': 0.022697385400533676, 'timestamp': '2025-09-10 02:18:49.787804', 'step': 1384, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 6170111174080}, 'timestamp': '2025-09-10 02:18:49.824331', 'step': 1384, 'epoch': 1} {'type': 'loss', 'content': 0.010887114331126213, 'timestamp': '2025-09-10 02:18:49.828629', 'step': 1385, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 176], 'flops': 5220903722048}, 'timestamp': '2025-09-10 02:18:49.861534', 'step': 1385, 'epoch': 1} {'type': 'loss', 'content': 0.012805354781448841, 'timestamp': '2025-09-10 02:18:49.865426', 'step': 1386, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 8543129804160}, 'timestamp': '2025-09-10 02:18:49.898188', 'step': 1386, 'epoch': 1} {'type': 'loss', 'content': 0.01245130505412817, 'timestamp': '2025-09-10 02:18:49.908448', 'step': 1387, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 5695507448064}, 'timestamp': '2025-09-10 02:18:49.939966', 'step': 1387, 'epoch': 1} {'type': 'loss', 'content': 0.028233621269464493, 'timestamp': '2025-09-10 02:18:49.964718', 'step': 1388, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 6644714900096}, 'timestamp': '2025-09-10 02:18:49.997212', 'step': 1388, 'epoch': 1} {'type': 'loss', 'content': 0.02826070412993431, 'timestamp': '2025-09-10 02:18:50.001547', 'step': 1389, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 6170111174080}, 'timestamp': '2025-09-10 02:18:50.033335', 'step': 1389, 'epoch': 1} {'type': 'loss', 'content': 0.009805792011320591, 'timestamp': '2025-09-10 02:18:50.040028', 'step': 1390, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 6644714900096}, 'timestamp': '2025-09-10 02:18:50.071252', 'step': 1390, 'epoch': 1} {'type': 'loss', 'content': 0.005362308118492365, 'timestamp': '2025-09-10 02:18:50.078294', 'step': 1391, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 4746299996032}, 'timestamp': '2025-09-10 02:18:50.111180', 'step': 1391, 'epoch': 1} {'type': 'loss', 'content': 0.013351285830140114, 'timestamp': '2025-09-10 02:18:50.135326', 'step': 1392, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 6644714900096}, 'timestamp': '2025-09-10 02:18:50.167928', 'step': 1392, 'epoch': 1} {'type': 'loss', 'content': 0.02243475615978241, 'timestamp': '2025-09-10 02:18:50.172362', 'step': 1393, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 320], 'flops': 9492337256192}, 'timestamp': '2025-09-10 02:18:50.210986', 'step': 1393, 'epoch': 1} {'type': 'loss', 'content': 0.014227988198399544, 'timestamp': '2025-09-10 02:18:50.223121', 'step': 1394, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 416], 'flops': 12339959612288}, 'timestamp': '2025-09-10 02:18:50.266730', 'step': 1394, 'epoch': 1} {'type': 'loss', 'content': 0.004071381408721209, 'timestamp': '2025-09-10 02:18:50.282661', 'step': 1395, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 8068526078144}, 'timestamp': '2025-09-10 02:18:50.314994', 'step': 1395, 'epoch': 1} {'type': 'loss', 'content': 0.008621515706181526, 'timestamp': '2025-09-10 02:18:50.345670', 'step': 1396, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 176], 'flops': 5220903722048}, 'timestamp': '2025-09-10 02:18:50.383033', 'step': 1396, 'epoch': 1} {'type': 'loss', 'content': 0.009817084297537804, 'timestamp': '2025-09-10 02:18:50.385956', 'step': 1397, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 7119318626112}, 'timestamp': '2025-09-10 02:18:50.419718', 'step': 1397, 'epoch': 1} {'type': 'loss', 'content': 0.012866640463471413, 'timestamp': '2025-09-10 02:18:50.426916', 'step': 1398, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 5695507448064}, 'timestamp': '2025-09-10 02:18:50.462427', 'step': 1398, 'epoch': 1} {'type': 'loss', 'content': 0.004508704878389835, 'timestamp': '2025-09-10 02:18:50.466612', 'step': 1399, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 4746299996032}, 'timestamp': '2025-09-10 02:18:50.499638', 'step': 1399, 'epoch': 1} {'type': 'loss', 'content': 0.01872037909924984, 'timestamp': '2025-09-10 02:18:50.523484', 'step': 1400, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 6644714900096}, 'timestamp': '2025-09-10 02:18:50.556465', 'step': 1400, 'epoch': 1} {'type': 'loss', 'content': 0.006643320899456739, 'timestamp': '2025-09-10 02:18:50.560693', 'step': 1401, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 7593922352128}, 'timestamp': '2025-09-10 02:18:50.591810', 'step': 1401, 'epoch': 1} {'type': 'loss', 'content': 0.007569948676973581, 'timestamp': '2025-09-10 02:18:50.599186', 'step': 1402, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 8068526078144}, 'timestamp': '2025-09-10 02:18:50.630800', 'step': 1402, 'epoch': 1} {'type': 'loss', 'content': 0.025758620351552963, 'timestamp': '2025-09-10 02:18:50.640631', 'step': 1403, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 176], 'flops': 5220903722048}, 'timestamp': '2025-09-10 02:18:50.671891', 'step': 1403, 'epoch': 1} {'type': 'loss', 'content': 0.0335959829390049, 'timestamp': '2025-09-10 02:18:50.696605', 'step': 1404, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 608], 'flops': 18035204324480}, 'timestamp': '2025-09-10 02:18:50.745995', 'step': 1404, 'epoch': 1} {'type': 'loss', 'content': 0.030695544555783272, 'timestamp': '2025-09-10 02:18:50.767499', 'step': 1405, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 5695507448064}, 'timestamp': '2025-09-10 02:18:50.799735', 'step': 1405, 'epoch': 1} {'type': 'loss', 'content': 0.0049163768999278545, 'timestamp': '2025-09-10 02:18:50.804055', 'step': 1406, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 176], 'flops': 5220903722048}, 'timestamp': '2025-09-10 02:18:50.836221', 'step': 1406, 'epoch': 1} {'type': 'loss', 'content': 0.010724040679633617, 'timestamp': '2025-09-10 02:18:50.839963', 'step': 1407, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 5695507448064}, 'timestamp': '2025-09-10 02:18:50.871715', 'step': 1407, 'epoch': 1} {'type': 'loss', 'content': 0.008394693955779076, 'timestamp': '2025-09-10 02:18:50.896887', 'step': 1408, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 7119318626112}, 'timestamp': '2025-09-10 02:18:50.928718', 'step': 1408, 'epoch': 1} {'type': 'loss', 'content': 0.026338692754507065, 'timestamp': '2025-09-10 02:18:50.933342', 'step': 1409, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 5695507448064}, 'timestamp': '2025-09-10 02:18:50.967081', 'step': 1409, 'epoch': 1} {'type': 'loss', 'content': 0.016750004142522812, 'timestamp': '2025-09-10 02:18:50.971288', 'step': 1410, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 7593922352128}, 'timestamp': '2025-09-10 02:18:51.003371', 'step': 1410, 'epoch': 1} {'type': 'loss', 'content': 0.012398646213114262, 'timestamp': '2025-09-10 02:18:51.010725', 'step': 1411, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 5695507448064}, 'timestamp': '2025-09-10 02:18:51.042668', 'step': 1411, 'epoch': 1} {'type': 'loss', 'content': 0.0241679884493351, 'timestamp': '2025-09-10 02:18:51.067747', 'step': 1412, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 6170111174080}, 'timestamp': '2025-09-10 02:18:51.100071', 'step': 1412, 'epoch': 1} {'type': 'loss', 'content': 0.02775205485522747, 'timestamp': '2025-09-10 02:18:51.104368', 'step': 1413, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 7119318626112}, 'timestamp': '2025-09-10 02:18:51.136377', 'step': 1413, 'epoch': 1} {'type': 'loss', 'content': 0.008829674683511257, 'timestamp': '2025-09-10 02:18:51.143509', 'step': 1414, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 7593922352128}, 'timestamp': '2025-09-10 02:18:51.175624', 'step': 1414, 'epoch': 1} {'type': 'loss', 'content': 0.015450743958353996, 'timestamp': '2025-09-10 02:18:51.182906', 'step': 1415, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 304], 'flops': 9017733530176}, 'timestamp': '2025-09-10 02:18:51.215436', 'step': 1415, 'epoch': 1} {'type': 'loss', 'content': 0.006386533845216036, 'timestamp': '2025-09-10 02:18:51.248305', 'step': 1416, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 7119318626112}, 'timestamp': '2025-09-10 02:18:51.280172', 'step': 1416, 'epoch': 1} {'type': 'loss', 'content': 0.010058706626296043, 'timestamp': '2025-09-10 02:18:51.284942', 'step': 1417, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 6170111174080}, 'timestamp': '2025-09-10 02:18:51.316164', 'step': 1417, 'epoch': 1} {'type': 'loss', 'content': 0.03240646421909332, 'timestamp': '2025-09-10 02:18:51.322735', 'step': 1418, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 8543129804160}, 'timestamp': '2025-09-10 02:18:51.354522', 'step': 1418, 'epoch': 1} {'type': 'loss', 'content': 0.009293629787862301, 'timestamp': '2025-09-10 02:18:51.364958', 'step': 1419, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 7119318626112}, 'timestamp': '2025-09-10 02:18:51.397068', 'step': 1419, 'epoch': 1} {'type': 'loss', 'content': 0.009477603249251842, 'timestamp': '2025-09-10 02:18:51.425134', 'step': 1420, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 7593922352128}, 'timestamp': '2025-09-10 02:18:51.457027', 'step': 1420, 'epoch': 1} {'type': 'loss', 'content': 0.0042470647022128105, 'timestamp': '2025-09-10 02:18:51.462142', 'step': 1421, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 7119318626112}, 'timestamp': '2025-09-10 02:18:51.493350', 'step': 1421, 'epoch': 1} {'type': 'loss', 'content': 0.028168709948658943, 'timestamp': '2025-09-10 02:18:51.500444', 'step': 1422, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 6644714900096}, 'timestamp': '2025-09-10 02:18:51.531912', 'step': 1422, 'epoch': 1} {'type': 'loss', 'content': 0.054098401218652725, 'timestamp': '2025-09-10 02:18:51.538707', 'step': 1423, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 5695507448064}, 'timestamp': '2025-09-10 02:18:51.569998', 'step': 1423, 'epoch': 1} {'type': 'loss', 'content': 0.013340512290596962, 'timestamp': '2025-09-10 02:18:51.594750', 'step': 1424, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 7119318626112}, 'timestamp': '2025-09-10 02:18:51.626137', 'step': 1424, 'epoch': 1} {'type': 'loss', 'content': 0.001269067986868322, 'timestamp': '2025-09-10 02:18:51.631049', 'step': 1425, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 7119318626112}, 'timestamp': '2025-09-10 02:18:51.661884', 'step': 1425, 'epoch': 1} {'type': 'loss', 'content': 0.01629549451172352, 'timestamp': '2025-09-10 02:18:51.669352', 'step': 1426, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 304], 'flops': 9017733530176}, 'timestamp': '2025-09-10 02:18:51.700505', 'step': 1426, 'epoch': 1} {'type': 'loss', 'content': 0.013591033406555653, 'timestamp': '2025-09-10 02:18:51.712492', 'step': 1427, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 7593922352128}, 'timestamp': '2025-09-10 02:18:51.743458', 'step': 1427, 'epoch': 1} {'type': 'loss', 'content': 0.01069872546941042, 'timestamp': '2025-09-10 02:18:51.772110', 'step': 1428, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 6170111174080}, 'timestamp': '2025-09-10 02:18:51.802797', 'step': 1428, 'epoch': 1} {'type': 'loss', 'content': 0.026248564943671227, 'timestamp': '2025-09-10 02:18:51.807208', 'step': 1429, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 6644714900096}, 'timestamp': '2025-09-10 02:18:51.838296', 'step': 1429, 'epoch': 1} {'type': 'loss', 'content': 0.013996967114508152, 'timestamp': '2025-09-10 02:18:51.845290', 'step': 1430, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 656], 'flops': 19459015502528}, 'timestamp': '2025-09-10 02:18:51.901325', 'step': 1430, 'epoch': 1} {'type': 'loss', 'content': 0.01444973610341549, 'timestamp': '2025-09-10 02:18:51.924697', 'step': 1431, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 6170111174080}, 'timestamp': '2025-09-10 02:18:51.957150', 'step': 1431, 'epoch': 1} {'type': 'loss', 'content': 0.01952037401497364, 'timestamp': '2025-09-10 02:18:51.984564', 'step': 1432, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 6170111174080}, 'timestamp': '2025-09-10 02:18:52.015526', 'step': 1432, 'epoch': 1} {'type': 'loss', 'content': 0.02621072344481945, 'timestamp': '2025-09-10 02:18:52.019985', 'step': 1433, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 5695507448064}, 'timestamp': '2025-09-10 02:18:52.050722', 'step': 1433, 'epoch': 1} {'type': 'loss', 'content': 0.019877398386597633, 'timestamp': '2025-09-10 02:18:52.055320', 'step': 1434, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 8543129804160}, 'timestamp': '2025-09-10 02:18:52.086287', 'step': 1434, 'epoch': 1} {'type': 'loss', 'content': 0.008985900320112705, 'timestamp': '2025-09-10 02:18:52.096891', 'step': 1435, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 8068526078144}, 'timestamp': '2025-09-10 02:18:52.128162', 'step': 1435, 'epoch': 1} {'type': 'loss', 'content': 0.006387191358953714, 'timestamp': '2025-09-10 02:18:52.158997', 'step': 1436, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 176], 'flops': 5220903722048}, 'timestamp': '2025-09-10 02:18:52.189148', 'step': 1436, 'epoch': 1} {'type': 'loss', 'content': 0.008200598880648613, 'timestamp': '2025-09-10 02:18:52.192387', 'step': 1437, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-10 02:18:52.224886', 'step': 1437, 'epoch': 1} {'type': 'loss', 'content': 0.014574953354895115, 'timestamp': '2025-09-10 02:18:52.228614', 'step': 1438, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 6644714900096}, 'timestamp': '2025-09-10 02:18:52.265452', 'step': 1438, 'epoch': 1} {'type': 'loss', 'content': 0.010290967300534248, 'timestamp': '2025-09-10 02:18:52.272532', 'step': 1439, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 8068526078144}, 'timestamp': '2025-09-10 02:18:52.305412', 'step': 1439, 'epoch': 1} {'type': 'loss', 'content': 0.009290986694395542, 'timestamp': '2025-09-10 02:18:52.336620', 'step': 1440, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 8543129804160}, 'timestamp': '2025-09-10 02:18:52.372568', 'step': 1440, 'epoch': 1} {'type': 'loss', 'content': 0.007599604316055775, 'timestamp': '2025-09-10 02:18:52.380911', 'step': 1441, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 6170111174080}, 'timestamp': '2025-09-10 02:18:52.415184', 'step': 1441, 'epoch': 1} {'type': 'loss', 'content': 0.008472729474306107, 'timestamp': '2025-09-10 02:18:52.421944', 'step': 1442, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 7119318626112}, 'timestamp': '2025-09-10 02:18:52.452994', 'step': 1442, 'epoch': 1} {'type': 'loss', 'content': 0.01178077794611454, 'timestamp': '2025-09-10 02:18:52.460256', 'step': 1443, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 7593922352128}, 'timestamp': '2025-09-10 02:18:52.492503', 'step': 1443, 'epoch': 1} {'type': 'loss', 'content': 0.02008945122361183, 'timestamp': '2025-09-10 02:18:52.520123', 'step': 1444, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 6644714900096}, 'timestamp': '2025-09-10 02:18:52.553628', 'step': 1444, 'epoch': 1} {'type': 'loss', 'content': 0.004093306139111519, 'timestamp': '2025-09-10 02:18:52.556553', 'step': 1445, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 6644714900096}, 'timestamp': '2025-09-10 02:18:52.589207', 'step': 1445, 'epoch': 1} {'type': 'loss', 'content': 0.007239846047013998, 'timestamp': '2025-09-10 02:18:52.595784', 'step': 1446, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 8068526078144}, 'timestamp': '2025-09-10 02:18:52.627497', 'step': 1446, 'epoch': 1} {'type': 'loss', 'content': 0.010106794536113739, 'timestamp': '2025-09-10 02:18:52.636740', 'step': 1447, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 416], 'flops': 12339959612288}, 'timestamp': '2025-09-10 02:18:52.675187', 'step': 1447, 'epoch': 1} {'type': 'loss', 'content': 0.013031134381890297, 'timestamp': '2025-09-10 02:18:52.711997', 'step': 1448, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 320], 'flops': 9492337256192}, 'timestamp': '2025-09-10 02:18:52.753937', 'step': 1448, 'epoch': 1} {'type': 'loss', 'content': 0.008832174353301525, 'timestamp': '2025-09-10 02:18:52.761978', 'step': 1449, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 8068526078144}, 'timestamp': '2025-09-10 02:18:52.794971', 'step': 1449, 'epoch': 1} {'type': 'loss', 'content': 0.021678507328033447, 'timestamp': '2025-09-10 02:18:52.804407', 'step': 1450, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 6644714900096}, 'timestamp': '2025-09-10 02:18:52.834882', 'step': 1450, 'epoch': 1} {'type': 'loss', 'content': 0.006038912571966648, 'timestamp': '2025-09-10 02:18:52.841917', 'step': 1451, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 8068526078144}, 'timestamp': '2025-09-10 02:18:52.872336', 'step': 1451, 'epoch': 1} {'type': 'loss', 'content': 0.015430964529514313, 'timestamp': '2025-09-10 02:18:52.903214', 'step': 1452, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 7119318626112}, 'timestamp': '2025-09-10 02:18:52.934017', 'step': 1452, 'epoch': 1} {'type': 'loss', 'content': 0.015627246350049973, 'timestamp': '2025-09-10 02:18:52.939110', 'step': 1453, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 320], 'flops': 9492337256192}, 'timestamp': '2025-09-10 02:18:52.969456', 'step': 1453, 'epoch': 1} {'type': 'loss', 'content': 0.01219885889440775, 'timestamp': '2025-09-10 02:18:52.982005', 'step': 1454, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 8543129804160}, 'timestamp': '2025-09-10 02:18:53.013431', 'step': 1454, 'epoch': 1} {'type': 'loss', 'content': 0.024581179022789, 'timestamp': '2025-09-10 02:18:53.024459', 'step': 1455, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 7119318626112}, 'timestamp': '2025-09-10 02:18:53.055390', 'step': 1455, 'epoch': 1} {'type': 'loss', 'content': 0.008804569952189922, 'timestamp': '2025-09-10 02:18:53.083745', 'step': 1456, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 8543129804160}, 'timestamp': '2025-09-10 02:18:53.115291', 'step': 1456, 'epoch': 1} {'type': 'loss', 'content': 0.013319587334990501, 'timestamp': '2025-09-10 02:18:53.123250', 'step': 1457, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 7119318626112}, 'timestamp': '2025-09-10 02:18:53.154585', 'step': 1457, 'epoch': 1} {'type': 'loss', 'content': 0.013631954789161682, 'timestamp': '2025-09-10 02:18:53.161717', 'step': 1458, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 6170111174080}, 'timestamp': '2025-09-10 02:18:53.194524', 'step': 1458, 'epoch': 1} {'type': 'loss', 'content': 0.012589896097779274, 'timestamp': '2025-09-10 02:18:53.200982', 'step': 1459, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 176], 'flops': 5220903722048}, 'timestamp': '2025-09-10 02:18:53.232868', 'step': 1459, 'epoch': 1} {'type': 'loss', 'content': 0.02879754640161991, 'timestamp': '2025-09-10 02:18:53.257640', 'step': 1460, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 7119318626112}, 'timestamp': '2025-09-10 02:18:53.288577', 'step': 1460, 'epoch': 1} {'type': 'loss', 'content': 0.008828964084386826, 'timestamp': '2025-09-10 02:18:53.293605', 'step': 1461, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 7593922352128}, 'timestamp': '2025-09-10 02:18:53.323952', 'step': 1461, 'epoch': 1} {'type': 'loss', 'content': 0.02157404087483883, 'timestamp': '2025-09-10 02:18:53.331730', 'step': 1462, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 8543129804160}, 'timestamp': '2025-09-10 02:18:53.362913', 'step': 1462, 'epoch': 1} {'type': 'loss', 'content': 0.02861526980996132, 'timestamp': '2025-09-10 02:18:53.373749', 'step': 1463, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 5695507448064}, 'timestamp': '2025-09-10 02:18:53.404535', 'step': 1463, 'epoch': 1} {'type': 'loss', 'content': 0.0109772440046072, 'timestamp': '2025-09-10 02:18:53.430088', 'step': 1464, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 5695507448064}, 'timestamp': '2025-09-10 02:18:53.460834', 'step': 1464, 'epoch': 1} {'type': 'loss', 'content': 0.02909570373594761, 'timestamp': '2025-09-10 02:18:53.463058', 'step': 1465, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 480], 'flops': 14238374516352}, 'timestamp': '2025-09-10 02:18:53.504235', 'step': 1465, 'epoch': 1} {'type': 'loss', 'content': 0.02886452153325081, 'timestamp': '2025-09-10 02:18:53.521575', 'step': 1466, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 400], 'flops': 11865355886272}, 'timestamp': '2025-09-10 02:18:53.560419', 'step': 1466, 'epoch': 1} {'type': 'loss', 'content': 0.013743521645665169, 'timestamp': '2025-09-10 02:18:53.576057', 'step': 1467, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 7119318626112}, 'timestamp': '2025-09-10 02:18:53.607609', 'step': 1467, 'epoch': 1} {'type': 'loss', 'content': 0.0125979483127594, 'timestamp': '2025-09-10 02:18:53.635898', 'step': 1468, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 320], 'flops': 9492337256192}, 'timestamp': '2025-09-10 02:18:53.667674', 'step': 1468, 'epoch': 1} {'type': 'loss', 'content': 0.02716805413365364, 'timestamp': '2025-09-10 02:18:53.677321', 'step': 1469, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 464], 'flops': 13763770790336}, 'timestamp': '2025-09-10 02:18:53.718446', 'step': 1469, 'epoch': 1} {'type': 'loss', 'content': 0.025139151141047478, 'timestamp': '2025-09-10 02:18:53.735533', 'step': 1470, 'epoch': 1} {'type': 'flops', 'content': [{'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5695213340928}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3796808916480}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4429610391296}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4746011128704}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4746011128704}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6011614078336}, {'type': 'perplexity', 'in_batch_dim': [8, 736], 'batch_size': 8, 'flops': 14554433988352}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3796808916480}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4113209653888}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3796808916480}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4746011128704}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2847606704256}, {'type': 'perplexity', 'in_batch_dim': [8, 320], 'batch_size': 8, 'flops': 6328014815744}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5378812603520}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3796808916480}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4746011128704}, {'type': 'perplexity', 'in_batch_dim': [8, 352], 'batch_size': 8, 'flops': 6960816290560}, {'type': 'perplexity', 'in_batch_dim': [8, 384], 'batch_size': 8, 'flops': 7593617765376}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5378812603520}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4429610391296}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4113209653888}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5062411866112}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5062411866112}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4429610391296}, {'type': 'perplexity', 'in_batch_dim': [8, 480], 'batch_size': 8, 'flops': 9492022189824}, {'type': 'perplexity', 'in_batch_dim': [8, 448], 'batch_size': 8, 'flops': 8859220715008}, {'type': 'perplexity', 'in_batch_dim': [8, 320], 'batch_size': 8, 'flops': 6328014815744}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3796808916480}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6011614078336}, {'type': 'perplexity', 'in_batch_dim': [8, 320], 'batch_size': 8, 'flops': 6328014815744}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5695213340928}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4746011128704}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4429610391296}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5378812603520}, {'type': 'perplexity', 'in_batch_dim': [8, 544], 'batch_size': 8, 'flops': 10757625139456}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4429610391296}, {'type': 'perplexity', 'in_batch_dim': [8, 320], 'batch_size': 8, 'flops': 6328014815744}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4113209653888}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4113209653888}, {'type': 'perplexity', 'in_batch_dim': [8, 320], 'batch_size': 8, 'flops': 6328014815744}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3480408179072}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5062411866112}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6011614078336}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4429610391296}, {'type': 'perplexity', 'in_batch_dim': [8, 384], 'batch_size': 8, 'flops': 7593617765376}, {'type': 'perplexity', 'in_batch_dim': [8, 416], 'batch_size': 8, 'flops': 8226419240192}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4746011128704}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3480408179072}, {'type': 'perplexity', 'in_batch_dim': [8, 384], 'batch_size': 8, 'flops': 7593617765376}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3480408179072}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3480408179072}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5378812603520}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3796808916480}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4113209653888}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5062411866112}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5378812603520}, {'type': 'perplexity', 'in_batch_dim': [8, 320], 'batch_size': 8, 'flops': 6328014815744}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4429610391296}, {'type': 'perplexity', 'in_batch_dim': [8, 160], 'batch_size': 8, 'flops': 3164007441664}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4113209653888}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5695213340928}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3796808916480}, {'type': 'perplexity', 'in_batch_dim': [8, 160], 'batch_size': 8, 'flops': 3164007441664}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6011614078336}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5062411866112}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4429610391296}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3480408179072}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4746011128704}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3480408179072}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4113209653888}, {'type': 'perplexity', 'in_batch_dim': [8, 624], 'batch_size': 8, 'flops': 12339628826496}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4113209653888}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5695213340928}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5062411866112}, {'type': 'perplexity', 'in_batch_dim': [8, 384], 'batch_size': 8, 'flops': 7593617765376}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4429610391296}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4113209653888}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5062411866112}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4429610391296}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5695213340928}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3796808916480}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5378812603520}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4429610391296}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5378812603520}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3796808916480}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5695213340928}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4429610391296}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5062411866112}, {'type': 'perplexity', 'in_batch_dim': [8, 160], 'batch_size': 8, 'flops': 3164007441664}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4746011128704}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5695213340928}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3796808916480}, {'type': 'perplexity', 'in_batch_dim': [8, 416], 'batch_size': 8, 'flops': 8226419240192}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3796808916480}, {'type': 'perplexity', 'in_batch_dim': [8, 160], 'batch_size': 8, 'flops': 3164007441664}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4746011128704}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3796808916480}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2847606704256}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5378812603520}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4429610391296}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4113209653888}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4429610391296}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6011614078336}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4113209653888}, {'type': 'perplexity', 'in_batch_dim': [8, 320], 'batch_size': 8, 'flops': 6328014815744}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4746011128704}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4746011128704}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3796808916480}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5062411866112}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3480408179072}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3796808916480}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3480408179072}, {'type': 'perplexity', 'in_batch_dim': [8, 352], 'batch_size': 8, 'flops': 6960816290560}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5062411866112}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4429610391296}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6011614078336}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5062411866112}, {'type': 'perplexity', 'in_batch_dim': [8, 576], 'batch_size': 8, 'flops': 11390426614272}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4429610391296}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3796808916480}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6011614078336}, {'type': 'perplexity', 'in_batch_dim': [8, 432], 'batch_size': 8, 'flops': 8542819977600}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5378812603520}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5378812603520}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5062411866112}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4113209653888}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4113209653888}, {'type': 'perplexity', 'in_batch_dim': [8, 368], 'batch_size': 8, 'flops': 7277217027968}, {'type': 'perplexity', 'in_batch_dim': [8, 160], 'batch_size': 8, 'flops': 3164007441664}, {'type': 'perplexity', 'in_batch_dim': [8, 368], 'batch_size': 8, 'flops': 7277217027968}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3796808916480}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4746011128704}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4113209653888}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4429610391296}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5695213340928}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5695213340928}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5062411866112}, {'type': 'perplexity', 'in_batch_dim': [8, 432], 'batch_size': 8, 'flops': 8542819977600}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5062411866112}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4113209653888}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3480408179072}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3480408179072}, {'type': 'perplexity', 'in_batch_dim': [8, 160], 'batch_size': 8, 'flops': 3164007441664}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4113209653888}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3796808916480}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5062411866112}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 368], 'batch_size': 8, 'flops': 7277217027968}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5378812603520}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5378812603520}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5062411866112}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2847606704256}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4429610391296}, {'type': 'perplexity', 'in_batch_dim': [8, 320], 'batch_size': 8, 'flops': 6328014815744}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3796808916480}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4429610391296}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4113209653888}, {'type': 'perplexity', 'in_batch_dim': [8, 384], 'batch_size': 8, 'flops': 7593617765376}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5378812603520}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4746011128704}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3480408179072}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5062411866112}, {'type': 'perplexity', 'in_batch_dim': [8, 320], 'batch_size': 8, 'flops': 6328014815744}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5378812603520}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4113209653888}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4113209653888}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5695213340928}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3480408179072}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6011614078336}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4746011128704}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5378812603520}, {'type': 'perplexity', 'in_batch_dim': [8, 320], 'batch_size': 8, 'flops': 6328014815744}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4429610391296}, {'type': 'perplexity', 'in_batch_dim': [8, 320], 'batch_size': 8, 'flops': 6328014815744}, {'type': 'perplexity', 'in_batch_dim': [8, 416], 'batch_size': 8, 'flops': 8226419240192}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5062411866112}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5062411866112}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2847606704256}, {'type': 'perplexity', 'in_batch_dim': [8, 336], 'batch_size': 8, 'flops': 6644415553152}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6011614078336}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5062411866112}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4113209653888}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5695213340928}, {'type': 'perplexity', 'in_batch_dim': [8, 416], 'batch_size': 8, 'flops': 8226419240192}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3796808916480}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5695213340928}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4746011128704}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4429610391296}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4113209653888}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3480408179072}, {'type': 'perplexity', 'in_batch_dim': [8, 320], 'batch_size': 8, 'flops': 6328014815744}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3480408179072}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5378812603520}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6011614078336}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4429610391296}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5695213340928}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5695213340928}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3480408179072}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4429610391296}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5062411866112}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3480408179072}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4113209653888}, {'type': 'perplexity', 'in_batch_dim': [8, 352], 'batch_size': 8, 'flops': 6960816290560}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4113209653888}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2847606704256}, {'type': 'perplexity', 'in_batch_dim': [8, 160], 'batch_size': 8, 'flops': 3164007441664}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3796808916480}, {'type': 'perplexity', 'in_batch_dim': [8, 496], 'batch_size': 8, 'flops': 9808422927232}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3480408179072}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4113209653888}, {'type': 'perplexity', 'in_batch_dim': [8, 544], 'batch_size': 8, 'flops': 10757625139456}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6011614078336}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5378812603520}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5695213340928}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4746011128704}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5695213340928}, {'type': 'perplexity', 'in_batch_dim': [8, 384], 'batch_size': 8, 'flops': 7593617765376}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3480408179072}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5378812603520}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3480408179072}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5378812603520}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5062411866112}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3796808916480}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5062411866112}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5695213340928}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5378812603520}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4429610391296}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5062411866112}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3480408179072}, {'type': 'perplexity', 'in_batch_dim': [8, 656], 'batch_size': 8, 'flops': 12972430301312}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5062411866112}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3796808916480}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3480408179072}, {'type': 'perplexity', 'in_batch_dim': [8, 160], 'batch_size': 8, 'flops': 3164007441664}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5695213340928}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6011614078336}, {'type': 'perplexity', 'in_batch_dim': [8, 320], 'batch_size': 8, 'flops': 6328014815744}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5062411866112}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5062411866112}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5695213340928}, {'type': 'perplexity', 'in_batch_dim': [8, 320], 'batch_size': 8, 'flops': 6328014815744}, {'type': 'perplexity', 'in_batch_dim': [8, 384], 'batch_size': 8, 'flops': 7593617765376}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5062411866112}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4746011128704}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4113209653888}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4746011128704}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4746011128704}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3480408179072}, {'type': 'perplexity', 'in_batch_dim': [8, 320], 'batch_size': 8, 'flops': 6328014815744}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5695213340928}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5062411866112}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4113209653888}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4746011128704}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2847606704256}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3796808916480}, {'type': 'perplexity', 'in_batch_dim': [8, 336], 'batch_size': 8, 'flops': 6644415553152}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4113209653888}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4746011128704}, {'type': 'perplexity', 'in_batch_dim': [8, 496], 'batch_size': 8, 'flops': 9808422927232}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5378812603520}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5378812603520}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4429610391296}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5062411866112}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6011614078336}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4746011128704}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3796808916480}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4113209653888}, {'type': 'perplexity', 'in_batch_dim': [8, 496], 'batch_size': 8, 'flops': 9808422927232}, {'type': 'perplexity', 'in_batch_dim': [8, 448], 'batch_size': 8, 'flops': 8859220715008}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6011614078336}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3796808916480}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3480408179072}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5062411866112}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4429610391296}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4113209653888}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4746011128704}, {'type': 'perplexity', 'in_batch_dim': [8, 320], 'batch_size': 8, 'flops': 6328014815744}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6011614078336}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3480408179072}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3796808916480}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3796808916480}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4746011128704}, {'type': 'perplexity', 'in_batch_dim': [8, 416], 'batch_size': 8, 'flops': 8226419240192}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5695213340928}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6011614078336}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6011614078336}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4746011128704}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3480408179072}, {'type': 'perplexity', 'in_batch_dim': [8, 160], 'batch_size': 8, 'flops': 3164007441664}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5695213340928}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3480408179072}, {'type': 'perplexity', 'in_batch_dim': [8, 400], 'batch_size': 8, 'flops': 7910018502784}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4429610391296}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5378812603520}, {'type': 'perplexity', 'in_batch_dim': [8, 400], 'batch_size': 8, 'flops': 7910018502784}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5378812603520}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3480408179072}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6011614078336}, {'type': 'perplexity', 'in_batch_dim': [8, 160], 'batch_size': 8, 'flops': 3164007441664}, {'type': 'perplexity', 'in_batch_dim': [8, 352], 'batch_size': 8, 'flops': 6960816290560}, {'type': 'perplexity', 'in_batch_dim': [8, 320], 'batch_size': 8, 'flops': 6328014815744}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5062411866112}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5062411866112}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4113209653888}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4429610391296}, {'type': 'perplexity', 'in_batch_dim': [8, 352], 'batch_size': 8, 'flops': 6960816290560}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4113209653888}, {'type': 'perplexity', 'in_batch_dim': [8, 160], 'batch_size': 8, 'flops': 3164007441664}, {'type': 'perplexity', 'in_batch_dim': [8, 336], 'batch_size': 8, 'flops': 6644415553152}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4429610391296}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3796808916480}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5378812603520}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5378812603520}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4113209653888}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5378812603520}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3796808916480}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5378812603520}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5378812603520}, {'type': 'perplexity', 'in_batch_dim': [8, 160], 'batch_size': 8, 'flops': 3164007441664}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3480408179072}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6011614078336}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5695213340928}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5378812603520}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5378812603520}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4429610391296}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4746011128704}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6011614078336}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5062411866112}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6011614078336}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5378812603520}, {'type': 'perplexity', 'in_batch_dim': [8, 160], 'batch_size': 8, 'flops': 3164007441664}, {'type': 'perplexity', 'in_batch_dim': [8, 160], 'batch_size': 8, 'flops': 3164007441664}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5062411866112}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5062411866112}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3796808916480}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6011614078336}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4113209653888}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4746011128704}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5378812603520}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3480408179072}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3796808916480}, {'type': 'perplexity', 'in_batch_dim': [8, 320], 'batch_size': 8, 'flops': 6328014815744}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5062411866112}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4746011128704}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5062411866112}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4746011128704}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6011614078336}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3796808916480}, {'type': 'perplexity', 'in_batch_dim': [8, 160], 'batch_size': 8, 'flops': 3164007441664}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2847606704256}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4429610391296}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4746011128704}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5062411866112}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5378812603520}, {'type': 'perplexity', 'in_batch_dim': [8, 464], 'batch_size': 8, 'flops': 9175621452416}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4113209653888}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3480408179072}, {'type': 'perplexity', 'in_batch_dim': [8, 384], 'batch_size': 8, 'flops': 7593617765376}, {'type': 'perplexity', 'in_batch_dim': [8, 448], 'batch_size': 8, 'flops': 8859220715008}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5378812603520}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4113209653888}, {'type': 'perplexity', 'in_batch_dim': [8, 320], 'batch_size': 8, 'flops': 6328014815744}, {'type': 'perplexity', 'in_batch_dim': [8, 560], 'batch_size': 8, 'flops': 11074025876864}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5378812603520}, {'type': 'perplexity', 'in_batch_dim': [8, 160], 'batch_size': 8, 'flops': 3164007441664}, {'type': 'perplexity', 'in_batch_dim': [8, 320], 'batch_size': 8, 'flops': 6328014815744}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5062411866112}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6011614078336}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4746011128704}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6011614078336}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3796808916480}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4429610391296}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4113209653888}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4746011128704}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3480408179072}, {'type': 'perplexity', 'in_batch_dim': [8, 384], 'batch_size': 8, 'flops': 7593617765376}, {'type': 'perplexity', 'in_batch_dim': [8, 576], 'batch_size': 8, 'flops': 11390426614272}, {'type': 'perplexity', 'in_batch_dim': [8, 160], 'batch_size': 8, 'flops': 3164007441664}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4746011128704}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4746011128704}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4429610391296}, {'type': 'perplexity', 'in_batch_dim': [8, 336], 'batch_size': 8, 'flops': 6644415553152}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4746011128704}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3796808916480}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3796808916480}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5062411866112}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5378812603520}, {'type': 'perplexity', 'in_batch_dim': [8, 368], 'batch_size': 8, 'flops': 7277217027968}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3480408179072}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4113209653888}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4746011128704}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3480408179072}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4746011128704}, {'type': 'perplexity', 'in_batch_dim': [8, 1168], 'batch_size': 8, 'flops': 23097253898368}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5062411866112}, {'type': 'perplexity', 'in_batch_dim': [8, 160], 'batch_size': 8, 'flops': 3164007441664}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4746011128704}, {'type': 'perplexity', 'in_batch_dim': [8, 640], 'batch_size': 8, 'flops': 12656029563904}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5378812603520}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4429610391296}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4429610391296}, {'type': 'perplexity', 'in_batch_dim': [8, 384], 'batch_size': 8, 'flops': 7593617765376}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3480408179072}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4113209653888}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3796808916480}, {'type': 'perplexity', 'in_batch_dim': [8, 336], 'batch_size': 8, 'flops': 6644415553152}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3480408179072}, {'type': 'perplexity', 'in_batch_dim': [6, 208], 'batch_size': 8, 'flops': 4113209653888}], 'timestamp': '2025-09-10 02:19:03.879290', 'step': 1470, 'epoch': 1} {'type': 'pplx', 'content': 13626061.914788976, 'timestamp': '2025-09-10 02:19:03.882109', 'step': 1470, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 7119318626112}, 'timestamp': '2025-09-10 02:19:03.912866', 'step': 1470, 'epoch': 1} {'type': 'loss', 'content': 0.017740854993462563, 'timestamp': '2025-09-10 02:19:03.918795', 'step': 1471, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 448], 'flops': 13289167064320}, 'timestamp': '2025-09-10 02:19:03.958443', 'step': 1471, 'epoch': 1} {'type': 'loss', 'content': 0.021945033222436905, 'timestamp': '2025-09-10 02:19:03.995667', 'step': 1472, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 6644714900096}, 'timestamp': '2025-09-10 02:19:04.026813', 'step': 1472, 'epoch': 1} {'type': 'loss', 'content': 0.003880431642755866, 'timestamp': '2025-09-10 02:19:04.031328', 'step': 1473, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 8543129804160}, 'timestamp': '2025-09-10 02:19:04.061204', 'step': 1473, 'epoch': 1} {'type': 'loss', 'content': 0.025182703509926796, 'timestamp': '2025-09-10 02:19:04.071981', 'step': 1474, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 320], 'flops': 9492337256192}, 'timestamp': '2025-09-10 02:19:04.104926', 'step': 1474, 'epoch': 1} {'type': 'loss', 'content': 0.009050360880792141, 'timestamp': '2025-09-10 02:19:04.117506', 'step': 1475, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 304], 'flops': 9017733530176}, 'timestamp': '2025-09-10 02:19:04.148809', 'step': 1475, 'epoch': 1} {'type': 'loss', 'content': 0.020268557593226433, 'timestamp': '2025-09-10 02:19:04.181676', 'step': 1476, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 5695507448064}, 'timestamp': '2025-09-10 02:19:04.211493', 'step': 1476, 'epoch': 1} {'type': 'loss', 'content': 0.04309564083814621, 'timestamp': '2025-09-10 02:19:04.213697', 'step': 1477, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 304], 'flops': 9017733530176}, 'timestamp': '2025-09-10 02:19:04.243933', 'step': 1477, 'epoch': 1} {'type': 'loss', 'content': 0.0122428759932518, 'timestamp': '2025-09-10 02:19:04.256107', 'step': 1478, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 6170111174080}, 'timestamp': '2025-09-10 02:19:04.286555', 'step': 1478, 'epoch': 1} {'type': 'loss', 'content': 0.021157732233405113, 'timestamp': '2025-09-10 02:19:04.293319', 'step': 1479, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 304], 'flops': 9017733530176}, 'timestamp': '2025-09-10 02:19:04.323838', 'step': 1479, 'epoch': 1} {'type': 'loss', 'content': 0.00686487415805459, 'timestamp': '2025-09-10 02:19:04.356900', 'step': 1480, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 8543129804160}, 'timestamp': '2025-09-10 02:19:04.387730', 'step': 1480, 'epoch': 1} {'type': 'loss', 'content': 0.009640970267355442, 'timestamp': '2025-09-10 02:19:04.395982', 'step': 1481, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 8543129804160}, 'timestamp': '2025-09-10 02:19:04.427699', 'step': 1481, 'epoch': 1} {'type': 'loss', 'content': 0.0068480512127280235, 'timestamp': '2025-09-10 02:19:04.438232', 'step': 1482, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 320], 'flops': 9492337256192}, 'timestamp': '2025-09-10 02:19:04.469817', 'step': 1482, 'epoch': 1} {'type': 'loss', 'content': 0.002509176731109619, 'timestamp': '2025-09-10 02:19:04.482385', 'step': 1483, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 6644714900096}, 'timestamp': '2025-09-10 02:19:04.511920', 'step': 1483, 'epoch': 1} {'type': 'loss', 'content': 0.0024369838647544384, 'timestamp': '2025-09-10 02:19:04.539793', 'step': 1484, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 5695507448064}, 'timestamp': '2025-09-10 02:19:04.570613', 'step': 1484, 'epoch': 1} {'type': 'loss', 'content': 0.012398682534694672, 'timestamp': '2025-09-10 02:19:04.572544', 'step': 1485, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 5695507448064}, 'timestamp': '2025-09-10 02:19:04.603140', 'step': 1485, 'epoch': 1} {'type': 'loss', 'content': 0.011835220269858837, 'timestamp': '2025-09-10 02:19:04.607851', 'step': 1486, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 176], 'flops': 5220903722048}, 'timestamp': '2025-09-10 02:19:04.638014', 'step': 1486, 'epoch': 1} {'type': 'loss', 'content': 0.004558969754725695, 'timestamp': '2025-09-10 02:19:04.641892', 'step': 1487, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 5695507448064}, 'timestamp': '2025-09-10 02:19:04.672545', 'step': 1487, 'epoch': 1} {'type': 'loss', 'content': 0.01352360937744379, 'timestamp': '2025-09-10 02:19:04.697856', 'step': 1488, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 176], 'flops': 5220903722048}, 'timestamp': '2025-09-10 02:19:04.728706', 'step': 1488, 'epoch': 1} {'type': 'loss', 'content': 0.00836183037608862, 'timestamp': '2025-09-10 02:19:04.730897', 'step': 1489, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 384], 'flops': 11390752160256}, 'timestamp': '2025-09-10 02:19:04.765809', 'step': 1489, 'epoch': 1} {'type': 'loss', 'content': 0.0033379762899130583, 'timestamp': '2025-09-10 02:19:04.779844', 'step': 1490, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-10 02:19:04.811017', 'step': 1490, 'epoch': 1} {'type': 'loss', 'content': 0.006920557469129562, 'timestamp': '2025-09-10 02:19:04.813361', 'step': 1491, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 6170111174080}, 'timestamp': '2025-09-10 02:19:04.844166', 'step': 1491, 'epoch': 1} {'type': 'loss', 'content': 0.018455183133482933, 'timestamp': '2025-09-10 02:19:04.872005', 'step': 1492, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 8068526078144}, 'timestamp': '2025-09-10 02:19:04.903125', 'step': 1492, 'epoch': 1} {'type': 'loss', 'content': 0.020488440990447998, 'timestamp': '2025-09-10 02:19:04.911072', 'step': 1493, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 7593922352128}, 'timestamp': '2025-09-10 02:19:04.941252', 'step': 1493, 'epoch': 1} {'type': 'loss', 'content': 0.017999647185206413, 'timestamp': '2025-09-10 02:19:04.949128', 'step': 1494, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 176], 'flops': 5220903722048}, 'timestamp': '2025-09-10 02:19:04.979967', 'step': 1494, 'epoch': 1} {'type': 'loss', 'content': 0.02349008433520794, 'timestamp': '2025-09-10 02:19:04.983658', 'step': 1495, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 8068526078144}, 'timestamp': '2025-09-10 02:19:05.016393', 'step': 1495, 'epoch': 1} {'type': 'loss', 'content': 0.006956641562283039, 'timestamp': '2025-09-10 02:19:05.047311', 'step': 1496, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 8543129804160}, 'timestamp': '2025-09-10 02:19:05.077419', 'step': 1496, 'epoch': 1} {'type': 'loss', 'content': 0.012771239504218102, 'timestamp': '2025-09-10 02:19:05.085768', 'step': 1497, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 7119318626112}, 'timestamp': '2025-09-10 02:19:05.116904', 'step': 1497, 'epoch': 1} {'type': 'loss', 'content': 0.028537657111883163, 'timestamp': '2025-09-10 02:19:05.124157', 'step': 1498, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 4746299996032}, 'timestamp': '2025-09-10 02:19:05.153637', 'step': 1498, 'epoch': 1} {'type': 'loss', 'content': 0.0020859253127127886, 'timestamp': '2025-09-10 02:19:05.156458', 'step': 1499, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 5695507448064}, 'timestamp': '2025-09-10 02:19:05.186926', 'step': 1499, 'epoch': 1} {'type': 'loss', 'content': 0.019206488505005836, 'timestamp': '2025-09-10 02:19:05.212161', 'step': 1500, 'epoch': 1} {'type': 'info', 'content': 'Checkpoint saved at step 1500', 'timestamp': '2025-09-10 02:19:09.902482', 'step': 1500, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 6170111174080}, 'timestamp': '2025-09-10 02:19:09.935075', 'step': 1500, 'epoch': 1} {'type': 'loss', 'content': 0.010249263606965542, 'timestamp': '2025-09-10 02:19:09.938233', 'step': 1501, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 8068526078144}, 'timestamp': '2025-09-10 02:19:09.970383', 'step': 1501, 'epoch': 1} {'type': 'loss', 'content': 0.024660227820277214, 'timestamp': '2025-09-10 02:19:09.979634', 'step': 1502, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 7119318626112}, 'timestamp': '2025-09-10 02:19:10.014194', 'step': 1502, 'epoch': 1} {'type': 'loss', 'content': 0.013713826425373554, 'timestamp': '2025-09-10 02:19:10.021249', 'step': 1503, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 480], 'flops': 14238374516352}, 'timestamp': '2025-09-10 02:19:10.063827', 'step': 1503, 'epoch': 1} {'type': 'loss', 'content': 0.006658419966697693, 'timestamp': '2025-09-10 02:19:10.102108', 'step': 1504, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 6170111174080}, 'timestamp': '2025-09-10 02:19:10.133383', 'step': 1504, 'epoch': 1} {'type': 'loss', 'content': 0.01818818412721157, 'timestamp': '2025-09-10 02:19:10.137431', 'step': 1505, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 7119318626112}, 'timestamp': '2025-09-10 02:19:10.169156', 'step': 1505, 'epoch': 1} {'type': 'loss', 'content': 0.023030122742056847, 'timestamp': '2025-09-10 02:19:10.176375', 'step': 1506, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 5695507448064}, 'timestamp': '2025-09-10 02:19:10.208117', 'step': 1506, 'epoch': 1} {'type': 'loss', 'content': 0.018940243870019913, 'timestamp': '2025-09-10 02:19:10.212098', 'step': 1507, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 432], 'flops': 12814563338304}, 'timestamp': '2025-09-10 02:19:10.250410', 'step': 1507, 'epoch': 1} {'type': 'loss', 'content': 0.006142920348793268, 'timestamp': '2025-09-10 02:19:10.287451', 'step': 1508, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 8543129804160}, 'timestamp': '2025-09-10 02:19:10.319677', 'step': 1508, 'epoch': 1} {'type': 'loss', 'content': 0.009842773899435997, 'timestamp': '2025-09-10 02:19:10.327667', 'step': 1509, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 7593922352128}, 'timestamp': '2025-09-10 02:19:10.357940', 'step': 1509, 'epoch': 1} {'type': 'loss', 'content': 0.018183773383498192, 'timestamp': '2025-09-10 02:19:10.365543', 'step': 1510, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 6644714900096}, 'timestamp': '2025-09-10 02:19:10.395927', 'step': 1510, 'epoch': 1} {'type': 'loss', 'content': 0.007604293525218964, 'timestamp': '2025-09-10 02:19:10.402947', 'step': 1511, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 6170111174080}, 'timestamp': '2025-09-10 02:19:10.433895', 'step': 1511, 'epoch': 1} {'type': 'loss', 'content': 0.008296381682157516, 'timestamp': '2025-09-10 02:19:10.461473', 'step': 1512, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 7119318626112}, 'timestamp': '2025-09-10 02:19:10.491791', 'step': 1512, 'epoch': 1} {'type': 'loss', 'content': 0.004962913691997528, 'timestamp': '2025-09-10 02:19:10.496950', 'step': 1513, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 7119318626112}, 'timestamp': '2025-09-10 02:19:10.528647', 'step': 1513, 'epoch': 1} {'type': 'loss', 'content': 0.006845235824584961, 'timestamp': '2025-09-10 02:19:10.536012', 'step': 1514, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 4746299996032}, 'timestamp': '2025-09-10 02:19:10.566685', 'step': 1514, 'epoch': 1} {'type': 'loss', 'content': 0.00035365772782824934, 'timestamp': '2025-09-10 02:19:10.569706', 'step': 1515, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 304], 'flops': 9017733530176}, 'timestamp': '2025-09-10 02:19:10.600963', 'step': 1515, 'epoch': 1} {'type': 'loss', 'content': 0.017009198665618896, 'timestamp': '2025-09-10 02:19:10.633604', 'step': 1516, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 6644714900096}, 'timestamp': '2025-09-10 02:19:10.665336', 'step': 1516, 'epoch': 1} {'type': 'loss', 'content': 0.011472431942820549, 'timestamp': '2025-09-10 02:19:10.669734', 'step': 1517, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 8068526078144}, 'timestamp': '2025-09-10 02:19:10.700629', 'step': 1517, 'epoch': 1} {'type': 'loss', 'content': 0.0018043555319309235, 'timestamp': '2025-09-10 02:19:10.710367', 'step': 1518, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 320], 'flops': 9492337256192}, 'timestamp': '2025-09-10 02:19:10.741714', 'step': 1518, 'epoch': 1} {'type': 'loss', 'content': 0.012901760637760162, 'timestamp': '2025-09-10 02:19:10.754161', 'step': 1519, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 6170111174080}, 'timestamp': '2025-09-10 02:19:10.786107', 'step': 1519, 'epoch': 1} {'type': 'loss', 'content': 0.003726641181856394, 'timestamp': '2025-09-10 02:19:10.813635', 'step': 1520, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 7593922352128}, 'timestamp': '2025-09-10 02:19:10.844031', 'step': 1520, 'epoch': 1} {'type': 'loss', 'content': 0.004837970249354839, 'timestamp': '2025-09-10 02:19:10.848890', 'step': 1521, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 176], 'flops': 5220903722048}, 'timestamp': '2025-09-10 02:19:10.880087', 'step': 1521, 'epoch': 1} {'type': 'loss', 'content': 0.027267929166555405, 'timestamp': '2025-09-10 02:19:10.883803', 'step': 1522, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 7119318626112}, 'timestamp': '2025-09-10 02:19:10.914418', 'step': 1522, 'epoch': 1} {'type': 'loss', 'content': 0.02369379624724388, 'timestamp': '2025-09-10 02:19:10.921907', 'step': 1523, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 7119318626112}, 'timestamp': '2025-09-10 02:19:10.957545', 'step': 1523, 'epoch': 1} {'type': 'loss', 'content': 0.0018621442141011357, 'timestamp': '2025-09-10 02:19:10.985654', 'step': 1524, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 7593922352128}, 'timestamp': '2025-09-10 02:19:11.017915', 'step': 1524, 'epoch': 1} {'type': 'loss', 'content': 0.01912684179842472, 'timestamp': '2025-09-10 02:19:11.022677', 'step': 1525, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-10 02:19:11.056726', 'step': 1525, 'epoch': 1} {'type': 'loss', 'content': 0.003869327250868082, 'timestamp': '2025-09-10 02:19:11.059031', 'step': 1526, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 416], 'flops': 12339959612288}, 'timestamp': '2025-09-10 02:19:11.097504', 'step': 1526, 'epoch': 1} {'type': 'loss', 'content': 0.0017400800716131926, 'timestamp': '2025-09-10 02:19:11.113340', 'step': 1527, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 304], 'flops': 9017733530176}, 'timestamp': '2025-09-10 02:19:11.145937', 'step': 1527, 'epoch': 1} {'type': 'loss', 'content': 0.0031910340767353773, 'timestamp': '2025-09-10 02:19:11.178488', 'step': 1528, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 6644714900096}, 'timestamp': '2025-09-10 02:19:11.209726', 'step': 1528, 'epoch': 1} {'type': 'loss', 'content': 0.00998393353074789, 'timestamp': '2025-09-10 02:19:11.214348', 'step': 1529, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 6644714900096}, 'timestamp': '2025-09-10 02:19:11.245050', 'step': 1529, 'epoch': 1} {'type': 'loss', 'content': 0.007462826557457447, 'timestamp': '2025-09-10 02:19:11.252114', 'step': 1530, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 8543129804160}, 'timestamp': '2025-09-10 02:19:11.282679', 'step': 1530, 'epoch': 1} {'type': 'loss', 'content': 0.009829898364841938, 'timestamp': '2025-09-10 02:19:11.293519', 'step': 1531, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 7119318626112}, 'timestamp': '2025-09-10 02:19:11.324112', 'step': 1531, 'epoch': 1} {'type': 'loss', 'content': 0.011728269048035145, 'timestamp': '2025-09-10 02:19:11.352538', 'step': 1532, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 480], 'flops': 14238374516352}, 'timestamp': '2025-09-10 02:19:11.391942', 'step': 1532, 'epoch': 1} {'type': 'loss', 'content': 0.009979200549423695, 'timestamp': '2025-09-10 02:19:11.408936', 'step': 1533, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 8068526078144}, 'timestamp': '2025-09-10 02:19:11.440144', 'step': 1533, 'epoch': 1} {'type': 'loss', 'content': 0.009194576181471348, 'timestamp': '2025-09-10 02:19:11.450196', 'step': 1534, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 176], 'flops': 5220903722048}, 'timestamp': '2025-09-10 02:19:11.482122', 'step': 1534, 'epoch': 1} {'type': 'loss', 'content': 0.0038809494581073523, 'timestamp': '2025-09-10 02:19:11.485890', 'step': 1535, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 6644714900096}, 'timestamp': '2025-09-10 02:19:11.516810', 'step': 1535, 'epoch': 1} {'type': 'loss', 'content': 0.04445614293217659, 'timestamp': '2025-09-10 02:19:11.544585', 'step': 1536, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 4746299996032}, 'timestamp': '2025-09-10 02:19:11.575947', 'step': 1536, 'epoch': 1} {'type': 'loss', 'content': 0.0013619901146739721, 'timestamp': '2025-09-10 02:19:11.578278', 'step': 1537, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 176], 'flops': 5220903722048}, 'timestamp': '2025-09-10 02:19:11.609705', 'step': 1537, 'epoch': 1} {'type': 'loss', 'content': 0.008444820530712605, 'timestamp': '2025-09-10 02:19:11.613684', 'step': 1538, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 8068526078144}, 'timestamp': '2025-09-10 02:19:11.645949', 'step': 1538, 'epoch': 1} {'type': 'loss', 'content': 0.010615772567689419, 'timestamp': '2025-09-10 02:19:11.655699', 'step': 1539, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 7593922352128}, 'timestamp': '2025-09-10 02:19:11.686968', 'step': 1539, 'epoch': 1} {'type': 'loss', 'content': 0.018414005637168884, 'timestamp': '2025-09-10 02:19:11.715405', 'step': 1540, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 4746299996032}, 'timestamp': '2025-09-10 02:19:11.746396', 'step': 1540, 'epoch': 1} {'type': 'loss', 'content': 0.016833599656820297, 'timestamp': '2025-09-10 02:19:11.748965', 'step': 1541, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 8543129804160}, 'timestamp': '2025-09-10 02:19:11.780055', 'step': 1541, 'epoch': 1} {'type': 'loss', 'content': 0.021267401054501534, 'timestamp': '2025-09-10 02:19:11.790787', 'step': 1542, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 6170111174080}, 'timestamp': '2025-09-10 02:19:11.820770', 'step': 1542, 'epoch': 1} {'type': 'loss', 'content': 0.0016013866988942027, 'timestamp': '2025-09-10 02:19:11.827864', 'step': 1543, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 8068526078144}, 'timestamp': '2025-09-10 02:19:11.859009', 'step': 1543, 'epoch': 1} {'type': 'loss', 'content': 0.002547146985307336, 'timestamp': '2025-09-10 02:19:11.889785', 'step': 1544, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 352], 'flops': 10441544708224}, 'timestamp': '2025-09-10 02:19:11.923472', 'step': 1544, 'epoch': 1} {'type': 'loss', 'content': 0.0054414160549640656, 'timestamp': '2025-09-10 02:19:11.936361', 'step': 1545, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 6170111174080}, 'timestamp': '2025-09-10 02:19:11.967739', 'step': 1545, 'epoch': 1} {'type': 'loss', 'content': 0.003947163466364145, 'timestamp': '2025-09-10 02:19:11.974295', 'step': 1546, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 5695507448064}, 'timestamp': '2025-09-10 02:19:12.005187', 'step': 1546, 'epoch': 1} {'type': 'loss', 'content': 0.011945655569434166, 'timestamp': '2025-09-10 02:19:12.009348', 'step': 1547, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 8068526078144}, 'timestamp': '2025-09-10 02:19:12.043937', 'step': 1547, 'epoch': 1} {'type': 'loss', 'content': 0.024208705872297287, 'timestamp': '2025-09-10 02:19:12.074259', 'step': 1548, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 7593922352128}, 'timestamp': '2025-09-10 02:19:12.106578', 'step': 1548, 'epoch': 1} {'type': 'loss', 'content': 0.0040494343265891075, 'timestamp': '2025-09-10 02:19:12.111391', 'step': 1549, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 5695507448064}, 'timestamp': '2025-09-10 02:19:12.143234', 'step': 1549, 'epoch': 1} {'type': 'loss', 'content': 0.0034531753044575453, 'timestamp': '2025-09-10 02:19:12.147564', 'step': 1550, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 8543129804160}, 'timestamp': '2025-09-10 02:19:12.178742', 'step': 1550, 'epoch': 1} {'type': 'loss', 'content': 0.006650130730122328, 'timestamp': '2025-09-10 02:19:12.189561', 'step': 1551, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 5695507448064}, 'timestamp': '2025-09-10 02:19:12.220244', 'step': 1551, 'epoch': 1} {'type': 'loss', 'content': 0.015004181303083897, 'timestamp': '2025-09-10 02:19:12.245449', 'step': 1552, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 6644714900096}, 'timestamp': '2025-09-10 02:19:12.276928', 'step': 1552, 'epoch': 1} {'type': 'loss', 'content': 0.04160107299685478, 'timestamp': '2025-09-10 02:19:12.282194', 'step': 1553, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 6644714900096}, 'timestamp': '2025-09-10 02:19:12.317576', 'step': 1553, 'epoch': 1} {'type': 'loss', 'content': 0.0034997588954865932, 'timestamp': '2025-09-10 02:19:12.324532', 'step': 1554, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 176], 'flops': 5220903722048}, 'timestamp': '2025-09-10 02:19:12.361708', 'step': 1554, 'epoch': 1} {'type': 'loss', 'content': 0.032703883945941925, 'timestamp': '2025-09-10 02:19:12.365640', 'step': 1555, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 320], 'flops': 9492337256192}, 'timestamp': '2025-09-10 02:19:12.402175', 'step': 1555, 'epoch': 1} {'type': 'loss', 'content': 0.0007587299915030599, 'timestamp': '2025-09-10 02:19:12.435646', 'step': 1556, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 7593922352128}, 'timestamp': '2025-09-10 02:19:12.472372', 'step': 1556, 'epoch': 1} {'type': 'loss', 'content': 0.01702108420431614, 'timestamp': '2025-09-10 02:19:12.477555', 'step': 1557, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 7119318626112}, 'timestamp': '2025-09-10 02:19:12.512655', 'step': 1557, 'epoch': 1} {'type': 'loss', 'content': 0.03824358060956001, 'timestamp': '2025-09-10 02:19:12.520053', 'step': 1558, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 6170111174080}, 'timestamp': '2025-09-10 02:19:12.554768', 'step': 1558, 'epoch': 1} {'type': 'loss', 'content': 0.04590751603245735, 'timestamp': '2025-09-10 02:19:12.561526', 'step': 1559, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 6644714900096}, 'timestamp': '2025-09-10 02:19:12.593528', 'step': 1559, 'epoch': 1} {'type': 'loss', 'content': 0.0073296381160616875, 'timestamp': '2025-09-10 02:19:12.621090', 'step': 1560, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 304], 'flops': 9017733530176}, 'timestamp': '2025-09-10 02:19:12.656359', 'step': 1560, 'epoch': 1} {'type': 'loss', 'content': 0.00989292562007904, 'timestamp': '2025-09-10 02:19:12.665182', 'step': 1561, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 320], 'flops': 9492337256192}, 'timestamp': '2025-09-10 02:19:12.698238', 'step': 1561, 'epoch': 1} {'type': 'loss', 'content': 0.004150025546550751, 'timestamp': '2025-09-10 02:19:12.710416', 'step': 1562, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 6644714900096}, 'timestamp': '2025-09-10 02:19:12.743156', 'step': 1562, 'epoch': 1} {'type': 'loss', 'content': 0.005096559878438711, 'timestamp': '2025-09-10 02:19:12.750017', 'step': 1563, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 5695507448064}, 'timestamp': '2025-09-10 02:19:12.783012', 'step': 1563, 'epoch': 1} {'type': 'loss', 'content': 0.010926149785518646, 'timestamp': '2025-09-10 02:19:12.808126', 'step': 1564, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 176], 'flops': 5220903722048}, 'timestamp': '2025-09-10 02:19:12.838762', 'step': 1564, 'epoch': 1} {'type': 'loss', 'content': 0.0010295318206772208, 'timestamp': '2025-09-10 02:19:12.840914', 'step': 1565, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 6644714900096}, 'timestamp': '2025-09-10 02:19:12.871398', 'step': 1565, 'epoch': 1} {'type': 'loss', 'content': 0.03598492965102196, 'timestamp': '2025-09-10 02:19:12.878101', 'step': 1566, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 8543129804160}, 'timestamp': '2025-09-10 02:19:12.908645', 'step': 1566, 'epoch': 1} {'type': 'loss', 'content': 0.020379869267344475, 'timestamp': '2025-09-10 02:19:12.919096', 'step': 1567, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 7593922352128}, 'timestamp': '2025-09-10 02:19:12.950808', 'step': 1567, 'epoch': 1} {'type': 'loss', 'content': 0.014766373671591282, 'timestamp': '2025-09-10 02:19:12.979285', 'step': 1568, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 336], 'flops': 9966940982208}, 'timestamp': '2025-09-10 02:19:13.013184', 'step': 1568, 'epoch': 1} {'type': 'loss', 'content': 0.004263672977685928, 'timestamp': '2025-09-10 02:19:13.025928', 'step': 1569, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 4746299996032}, 'timestamp': '2025-09-10 02:19:13.059028', 'step': 1569, 'epoch': 1} {'type': 'loss', 'content': 0.003036454552784562, 'timestamp': '2025-09-10 02:19:13.061593', 'step': 1570, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 7593922352128}, 'timestamp': '2025-09-10 02:19:13.092133', 'step': 1570, 'epoch': 1} {'type': 'loss', 'content': 0.02348274551331997, 'timestamp': '2025-09-10 02:19:13.099943', 'step': 1571, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 352], 'flops': 10441544708224}, 'timestamp': '2025-09-10 02:19:13.133967', 'step': 1571, 'epoch': 1} {'type': 'loss', 'content': 0.004423712845891714, 'timestamp': '2025-09-10 02:19:13.168482', 'step': 1572, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 304], 'flops': 9017733530176}, 'timestamp': '2025-09-10 02:19:13.201480', 'step': 1572, 'epoch': 1} {'type': 'loss', 'content': 0.015006057918071747, 'timestamp': '2025-09-10 02:19:13.210174', 'step': 1573, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 5695507448064}, 'timestamp': '2025-09-10 02:19:13.241404', 'step': 1573, 'epoch': 1} {'type': 'loss', 'content': 0.01116950623691082, 'timestamp': '2025-09-10 02:19:13.245612', 'step': 1574, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 176], 'flops': 5220903722048}, 'timestamp': '2025-09-10 02:19:13.277344', 'step': 1574, 'epoch': 1} {'type': 'loss', 'content': 0.006449028849601746, 'timestamp': '2025-09-10 02:19:13.281195', 'step': 1575, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 6170111174080}, 'timestamp': '2025-09-10 02:19:13.312545', 'step': 1575, 'epoch': 1} {'type': 'loss', 'content': 0.013472805730998516, 'timestamp': '2025-09-10 02:19:13.340182', 'step': 1576, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 5695507448064}, 'timestamp': '2025-09-10 02:19:13.371388', 'step': 1576, 'epoch': 1} {'type': 'loss', 'content': 0.043971676379442215, 'timestamp': '2025-09-10 02:19:13.373639', 'step': 1577, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 6644714900096}, 'timestamp': '2025-09-10 02:19:13.405054', 'step': 1577, 'epoch': 1} {'type': 'loss', 'content': 0.0046881563030183315, 'timestamp': '2025-09-10 02:19:13.411863', 'step': 1578, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-10 02:19:13.442366', 'step': 1578, 'epoch': 1} {'type': 'loss', 'content': 0.0024195548612624407, 'timestamp': '2025-09-10 02:19:13.444544', 'step': 1579, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 6644714900096}, 'timestamp': '2025-09-10 02:19:13.475562', 'step': 1579, 'epoch': 1} {'type': 'loss', 'content': 0.01876218058168888, 'timestamp': '2025-09-10 02:19:13.503355', 'step': 1580, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 7119318626112}, 'timestamp': '2025-09-10 02:19:13.535008', 'step': 1580, 'epoch': 1} {'type': 'loss', 'content': 0.012913152575492859, 'timestamp': '2025-09-10 02:19:13.540082', 'step': 1581, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 6170111174080}, 'timestamp': '2025-09-10 02:19:13.570603', 'step': 1581, 'epoch': 1} {'type': 'loss', 'content': 0.0017830540891736746, 'timestamp': '2025-09-10 02:19:13.577270', 'step': 1582, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 304], 'flops': 9017733530176}, 'timestamp': '2025-09-10 02:19:13.610802', 'step': 1582, 'epoch': 1} {'type': 'loss', 'content': 0.05414802208542824, 'timestamp': '2025-09-10 02:19:13.622355', 'step': 1583, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 6644714900096}, 'timestamp': '2025-09-10 02:19:13.653529', 'step': 1583, 'epoch': 1} {'type': 'loss', 'content': 0.01702873595058918, 'timestamp': '2025-09-10 02:19:13.681039', 'step': 1584, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 8543129804160}, 'timestamp': '2025-09-10 02:19:13.712174', 'step': 1584, 'epoch': 1} {'type': 'loss', 'content': 0.02116026170551777, 'timestamp': '2025-09-10 02:19:13.719993', 'step': 1585, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 320], 'flops': 9492337256192}, 'timestamp': '2025-09-10 02:19:13.751245', 'step': 1585, 'epoch': 1} {'type': 'loss', 'content': 0.006673253607004881, 'timestamp': '2025-09-10 02:19:13.763423', 'step': 1586, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 6644714900096}, 'timestamp': '2025-09-10 02:19:13.795855', 'step': 1586, 'epoch': 1} {'type': 'loss', 'content': 0.004170980304479599, 'timestamp': '2025-09-10 02:19:13.802709', 'step': 1587, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 8068526078144}, 'timestamp': '2025-09-10 02:19:13.835247', 'step': 1587, 'epoch': 1} {'type': 'loss', 'content': 0.013706117868423462, 'timestamp': '2025-09-10 02:19:13.865659', 'step': 1588, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 7119318626112}, 'timestamp': '2025-09-10 02:19:13.897377', 'step': 1588, 'epoch': 1} {'type': 'loss', 'content': 0.0012754879426211119, 'timestamp': '2025-09-10 02:19:13.902337', 'step': 1589, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 8543129804160}, 'timestamp': '2025-09-10 02:19:13.934244', 'step': 1589, 'epoch': 1} {'type': 'loss', 'content': 0.002866287948563695, 'timestamp': '2025-09-10 02:19:13.945068', 'step': 1590, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 8068526078144}, 'timestamp': '2025-09-10 02:19:13.975845', 'step': 1590, 'epoch': 1} {'type': 'loss', 'content': 0.007324092090129852, 'timestamp': '2025-09-10 02:19:13.985907', 'step': 1591, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 7593922352128}, 'timestamp': '2025-09-10 02:19:14.017461', 'step': 1591, 'epoch': 1} {'type': 'loss', 'content': 0.02281094528734684, 'timestamp': '2025-09-10 02:19:14.045784', 'step': 1592, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:19:14.077329', 'step': 1592, 'epoch': 1} {'type': 'loss', 'content': 0.005747564602643251, 'timestamp': '2025-09-10 02:19:14.079549', 'step': 1593, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 304], 'flops': 9017733530176}, 'timestamp': '2025-09-10 02:19:14.110416', 'step': 1593, 'epoch': 1} {'type': 'loss', 'content': 0.00610779132694006, 'timestamp': '2025-09-10 02:19:14.122081', 'step': 1594, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 6644714900096}, 'timestamp': '2025-09-10 02:19:14.153333', 'step': 1594, 'epoch': 1} {'type': 'loss', 'content': 0.02713647671043873, 'timestamp': '2025-09-10 02:19:14.160177', 'step': 1595, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 4746299996032}, 'timestamp': '2025-09-10 02:19:14.190485', 'step': 1595, 'epoch': 1} {'type': 'loss', 'content': 0.023267099633812904, 'timestamp': '2025-09-10 02:19:14.214370', 'step': 1596, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 8068526078144}, 'timestamp': '2025-09-10 02:19:14.245070', 'step': 1596, 'epoch': 1} {'type': 'loss', 'content': 0.0023487545549869537, 'timestamp': '2025-09-10 02:19:14.252377', 'step': 1597, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 8068526078144}, 'timestamp': '2025-09-10 02:19:14.284544', 'step': 1597, 'epoch': 1} {'type': 'loss', 'content': 0.0011892582988366485, 'timestamp': '2025-09-10 02:19:14.294391', 'step': 1598, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 7119318626112}, 'timestamp': '2025-09-10 02:19:14.325831', 'step': 1598, 'epoch': 1} {'type': 'loss', 'content': 0.0046529993414878845, 'timestamp': '2025-09-10 02:19:14.333072', 'step': 1599, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 336], 'flops': 9966940982208}, 'timestamp': '2025-09-10 02:19:14.367251', 'step': 1599, 'epoch': 1} {'type': 'loss', 'content': 0.0007145693525671959, 'timestamp': '2025-09-10 02:19:14.401520', 'step': 1600, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 176], 'flops': 5220903722048}, 'timestamp': '2025-09-10 02:19:14.434584', 'step': 1600, 'epoch': 1} {'type': 'loss', 'content': 0.030808603391051292, 'timestamp': '2025-09-10 02:19:14.436496', 'step': 1601, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 304], 'flops': 9017733530176}, 'timestamp': '2025-09-10 02:19:14.468690', 'step': 1601, 'epoch': 1} {'type': 'loss', 'content': 0.029920728877186775, 'timestamp': '2025-09-10 02:19:14.480403', 'step': 1602, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 7593922352128}, 'timestamp': '2025-09-10 02:19:14.511771', 'step': 1602, 'epoch': 1} {'type': 'loss', 'content': 0.010802625678479671, 'timestamp': '2025-09-10 02:19:14.519362', 'step': 1603, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 320], 'flops': 9492337256192}, 'timestamp': '2025-09-10 02:19:14.550633', 'step': 1603, 'epoch': 1} {'type': 'loss', 'content': 0.010367213748395443, 'timestamp': '2025-09-10 02:19:14.583769', 'step': 1604, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 6170111174080}, 'timestamp': '2025-09-10 02:19:14.614780', 'step': 1604, 'epoch': 1} {'type': 'loss', 'content': 0.004078761674463749, 'timestamp': '2025-09-10 02:19:14.619101', 'step': 1605, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 6644714900096}, 'timestamp': '2025-09-10 02:19:14.650662', 'step': 1605, 'epoch': 1} {'type': 'loss', 'content': 0.01048226747661829, 'timestamp': '2025-09-10 02:19:14.657463', 'step': 1606, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 7119318626112}, 'timestamp': '2025-09-10 02:19:14.689199', 'step': 1606, 'epoch': 1} {'type': 'loss', 'content': 0.020741861313581467, 'timestamp': '2025-09-10 02:19:14.696470', 'step': 1607, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 6644714900096}, 'timestamp': '2025-09-10 02:19:14.727985', 'step': 1607, 'epoch': 1} {'type': 'loss', 'content': 0.02420172281563282, 'timestamp': '2025-09-10 02:19:14.760321', 'step': 1608, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 5695507448064}, 'timestamp': '2025-09-10 02:19:14.791127', 'step': 1608, 'epoch': 1} {'type': 'loss', 'content': 0.011232390999794006, 'timestamp': '2025-09-10 02:19:14.793190', 'step': 1609, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 6644714900096}, 'timestamp': '2025-09-10 02:19:14.824541', 'step': 1609, 'epoch': 1} {'type': 'loss', 'content': 0.025039060041308403, 'timestamp': '2025-09-10 02:19:14.831385', 'step': 1610, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 7119318626112}, 'timestamp': '2025-09-10 02:19:14.862592', 'step': 1610, 'epoch': 1} {'type': 'loss', 'content': 0.010274732485413551, 'timestamp': '2025-09-10 02:19:14.869832', 'step': 1611, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 7119318626112}, 'timestamp': '2025-09-10 02:19:14.901580', 'step': 1611, 'epoch': 1} {'type': 'loss', 'content': 0.007137875538319349, 'timestamp': '2025-09-10 02:19:14.929641', 'step': 1612, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 352], 'flops': 10441544708224}, 'timestamp': '2025-09-10 02:19:14.961939', 'step': 1612, 'epoch': 1} {'type': 'loss', 'content': 0.006099893245846033, 'timestamp': '2025-09-10 02:19:14.974929', 'step': 1613, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 416], 'flops': 12339959612288}, 'timestamp': '2025-09-10 02:19:15.013784', 'step': 1613, 'epoch': 1} {'type': 'loss', 'content': 0.013428665697574615, 'timestamp': '2025-09-10 02:19:15.029699', 'step': 1614, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 6644714900096}, 'timestamp': '2025-09-10 02:19:15.061526', 'step': 1614, 'epoch': 1} {'type': 'loss', 'content': 0.02884194441139698, 'timestamp': '2025-09-10 02:19:15.068318', 'step': 1615, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 8068526078144}, 'timestamp': '2025-09-10 02:19:15.099983', 'step': 1615, 'epoch': 1} {'type': 'loss', 'content': 0.03412342816591263, 'timestamp': '2025-09-10 02:19:15.130601', 'step': 1616, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 6170111174080}, 'timestamp': '2025-09-10 02:19:15.162345', 'step': 1616, 'epoch': 1} {'type': 'loss', 'content': 0.016644364222884178, 'timestamp': '2025-09-10 02:19:15.166419', 'step': 1617, 'epoch': 1} {'type': 'flops', 'content': [{'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5695213340928}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3796808916480}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4429610391296}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4746011128704}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4746011128704}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6011614078336}, {'type': 'perplexity', 'in_batch_dim': [8, 736], 'batch_size': 8, 'flops': 14554433988352}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3796808916480}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4113209653888}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3796808916480}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4746011128704}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2847606704256}, {'type': 'perplexity', 'in_batch_dim': [8, 320], 'batch_size': 8, 'flops': 6328014815744}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5378812603520}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3796808916480}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4746011128704}, {'type': 'perplexity', 'in_batch_dim': [8, 352], 'batch_size': 8, 'flops': 6960816290560}, {'type': 'perplexity', 'in_batch_dim': [8, 384], 'batch_size': 8, 'flops': 7593617765376}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5378812603520}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4429610391296}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4113209653888}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5062411866112}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5062411866112}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4429610391296}, {'type': 'perplexity', 'in_batch_dim': [8, 480], 'batch_size': 8, 'flops': 9492022189824}, {'type': 'perplexity', 'in_batch_dim': [8, 448], 'batch_size': 8, 'flops': 8859220715008}, {'type': 'perplexity', 'in_batch_dim': [8, 320], 'batch_size': 8, 'flops': 6328014815744}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3796808916480}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6011614078336}, {'type': 'perplexity', 'in_batch_dim': [8, 320], 'batch_size': 8, 'flops': 6328014815744}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5695213340928}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4746011128704}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4429610391296}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5378812603520}, {'type': 'perplexity', 'in_batch_dim': [8, 544], 'batch_size': 8, 'flops': 10757625139456}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4429610391296}, {'type': 'perplexity', 'in_batch_dim': [8, 320], 'batch_size': 8, 'flops': 6328014815744}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4113209653888}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4113209653888}, {'type': 'perplexity', 'in_batch_dim': [8, 320], 'batch_size': 8, 'flops': 6328014815744}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3480408179072}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5062411866112}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6011614078336}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4429610391296}, {'type': 'perplexity', 'in_batch_dim': [8, 384], 'batch_size': 8, 'flops': 7593617765376}, {'type': 'perplexity', 'in_batch_dim': [8, 416], 'batch_size': 8, 'flops': 8226419240192}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4746011128704}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3480408179072}, {'type': 'perplexity', 'in_batch_dim': [8, 384], 'batch_size': 8, 'flops': 7593617765376}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3480408179072}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3480408179072}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5378812603520}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3796808916480}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4113209653888}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5062411866112}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5378812603520}, {'type': 'perplexity', 'in_batch_dim': [8, 320], 'batch_size': 8, 'flops': 6328014815744}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4429610391296}, {'type': 'perplexity', 'in_batch_dim': [8, 160], 'batch_size': 8, 'flops': 3164007441664}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4113209653888}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5695213340928}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3796808916480}, {'type': 'perplexity', 'in_batch_dim': [8, 160], 'batch_size': 8, 'flops': 3164007441664}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6011614078336}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5062411866112}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4429610391296}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3480408179072}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4746011128704}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3480408179072}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4113209653888}, {'type': 'perplexity', 'in_batch_dim': [8, 624], 'batch_size': 8, 'flops': 12339628826496}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4113209653888}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5695213340928}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5062411866112}, {'type': 'perplexity', 'in_batch_dim': [8, 384], 'batch_size': 8, 'flops': 7593617765376}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4429610391296}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4113209653888}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5062411866112}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4429610391296}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5695213340928}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3796808916480}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5378812603520}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4429610391296}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5378812603520}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3796808916480}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5695213340928}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4429610391296}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5062411866112}, {'type': 'perplexity', 'in_batch_dim': [8, 160], 'batch_size': 8, 'flops': 3164007441664}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4746011128704}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5695213340928}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3796808916480}, {'type': 'perplexity', 'in_batch_dim': [8, 416], 'batch_size': 8, 'flops': 8226419240192}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3796808916480}, {'type': 'perplexity', 'in_batch_dim': [8, 160], 'batch_size': 8, 'flops': 3164007441664}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4746011128704}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3796808916480}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2847606704256}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5378812603520}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4429610391296}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4113209653888}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4429610391296}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6011614078336}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4113209653888}, {'type': 'perplexity', 'in_batch_dim': [8, 320], 'batch_size': 8, 'flops': 6328014815744}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4746011128704}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4746011128704}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3796808916480}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5062411866112}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3480408179072}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3796808916480}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3480408179072}, {'type': 'perplexity', 'in_batch_dim': [8, 352], 'batch_size': 8, 'flops': 6960816290560}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5062411866112}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4429610391296}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6011614078336}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5062411866112}, {'type': 'perplexity', 'in_batch_dim': [8, 576], 'batch_size': 8, 'flops': 11390426614272}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4429610391296}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3796808916480}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6011614078336}, {'type': 'perplexity', 'in_batch_dim': [8, 432], 'batch_size': 8, 'flops': 8542819977600}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5378812603520}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5378812603520}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5062411866112}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4113209653888}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4113209653888}, {'type': 'perplexity', 'in_batch_dim': [8, 368], 'batch_size': 8, 'flops': 7277217027968}, {'type': 'perplexity', 'in_batch_dim': [8, 160], 'batch_size': 8, 'flops': 3164007441664}, {'type': 'perplexity', 'in_batch_dim': [8, 368], 'batch_size': 8, 'flops': 7277217027968}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3796808916480}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4746011128704}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4113209653888}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4429610391296}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5695213340928}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5695213340928}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5062411866112}, {'type': 'perplexity', 'in_batch_dim': [8, 432], 'batch_size': 8, 'flops': 8542819977600}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5062411866112}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4113209653888}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3480408179072}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3480408179072}, {'type': 'perplexity', 'in_batch_dim': [8, 160], 'batch_size': 8, 'flops': 3164007441664}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4113209653888}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3796808916480}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5062411866112}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 368], 'batch_size': 8, 'flops': 7277217027968}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5378812603520}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5378812603520}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5062411866112}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2847606704256}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4429610391296}, {'type': 'perplexity', 'in_batch_dim': [8, 320], 'batch_size': 8, 'flops': 6328014815744}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3796808916480}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4429610391296}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4113209653888}, {'type': 'perplexity', 'in_batch_dim': [8, 384], 'batch_size': 8, 'flops': 7593617765376}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5378812603520}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4746011128704}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3480408179072}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5062411866112}, {'type': 'perplexity', 'in_batch_dim': [8, 320], 'batch_size': 8, 'flops': 6328014815744}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5378812603520}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4113209653888}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4113209653888}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5695213340928}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3480408179072}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6011614078336}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4746011128704}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5378812603520}, {'type': 'perplexity', 'in_batch_dim': [8, 320], 'batch_size': 8, 'flops': 6328014815744}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4429610391296}, {'type': 'perplexity', 'in_batch_dim': [8, 320], 'batch_size': 8, 'flops': 6328014815744}, {'type': 'perplexity', 'in_batch_dim': [8, 416], 'batch_size': 8, 'flops': 8226419240192}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5062411866112}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5062411866112}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2847606704256}, {'type': 'perplexity', 'in_batch_dim': [8, 336], 'batch_size': 8, 'flops': 6644415553152}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6011614078336}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5062411866112}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4113209653888}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5695213340928}, {'type': 'perplexity', 'in_batch_dim': [8, 416], 'batch_size': 8, 'flops': 8226419240192}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3796808916480}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5695213340928}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4746011128704}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4429610391296}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4113209653888}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3480408179072}, {'type': 'perplexity', 'in_batch_dim': [8, 320], 'batch_size': 8, 'flops': 6328014815744}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3480408179072}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5378812603520}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6011614078336}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4429610391296}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5695213340928}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5695213340928}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3480408179072}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4429610391296}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5062411866112}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3480408179072}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4113209653888}, {'type': 'perplexity', 'in_batch_dim': [8, 352], 'batch_size': 8, 'flops': 6960816290560}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4113209653888}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2847606704256}, {'type': 'perplexity', 'in_batch_dim': [8, 160], 'batch_size': 8, 'flops': 3164007441664}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3796808916480}, {'type': 'perplexity', 'in_batch_dim': [8, 496], 'batch_size': 8, 'flops': 9808422927232}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3480408179072}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4113209653888}, {'type': 'perplexity', 'in_batch_dim': [8, 544], 'batch_size': 8, 'flops': 10757625139456}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6011614078336}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5378812603520}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5695213340928}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4746011128704}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5695213340928}, {'type': 'perplexity', 'in_batch_dim': [8, 384], 'batch_size': 8, 'flops': 7593617765376}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3480408179072}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5378812603520}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3480408179072}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5378812603520}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5062411866112}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3796808916480}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5062411866112}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5695213340928}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5378812603520}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4429610391296}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5062411866112}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3480408179072}, {'type': 'perplexity', 'in_batch_dim': [8, 656], 'batch_size': 8, 'flops': 12972430301312}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5062411866112}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3796808916480}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3480408179072}, {'type': 'perplexity', 'in_batch_dim': [8, 160], 'batch_size': 8, 'flops': 3164007441664}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5695213340928}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6011614078336}, {'type': 'perplexity', 'in_batch_dim': [8, 320], 'batch_size': 8, 'flops': 6328014815744}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5062411866112}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5062411866112}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5695213340928}, {'type': 'perplexity', 'in_batch_dim': [8, 320], 'batch_size': 8, 'flops': 6328014815744}, {'type': 'perplexity', 'in_batch_dim': [8, 384], 'batch_size': 8, 'flops': 7593617765376}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5062411866112}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4746011128704}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4113209653888}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4746011128704}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4746011128704}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3480408179072}, {'type': 'perplexity', 'in_batch_dim': [8, 320], 'batch_size': 8, 'flops': 6328014815744}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5695213340928}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5062411866112}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4113209653888}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4746011128704}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2847606704256}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3796808916480}, {'type': 'perplexity', 'in_batch_dim': [8, 336], 'batch_size': 8, 'flops': 6644415553152}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4113209653888}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4746011128704}, {'type': 'perplexity', 'in_batch_dim': [8, 496], 'batch_size': 8, 'flops': 9808422927232}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5378812603520}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5378812603520}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4429610391296}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5062411866112}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6011614078336}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4746011128704}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3796808916480}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4113209653888}, {'type': 'perplexity', 'in_batch_dim': [8, 496], 'batch_size': 8, 'flops': 9808422927232}, {'type': 'perplexity', 'in_batch_dim': [8, 448], 'batch_size': 8, 'flops': 8859220715008}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6011614078336}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3796808916480}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3480408179072}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5062411866112}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4429610391296}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4113209653888}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4746011128704}, {'type': 'perplexity', 'in_batch_dim': [8, 320], 'batch_size': 8, 'flops': 6328014815744}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6011614078336}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3480408179072}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3796808916480}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3796808916480}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4746011128704}, {'type': 'perplexity', 'in_batch_dim': [8, 416], 'batch_size': 8, 'flops': 8226419240192}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5695213340928}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6011614078336}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6011614078336}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4746011128704}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3480408179072}, {'type': 'perplexity', 'in_batch_dim': [8, 160], 'batch_size': 8, 'flops': 3164007441664}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5695213340928}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3480408179072}, {'type': 'perplexity', 'in_batch_dim': [8, 400], 'batch_size': 8, 'flops': 7910018502784}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4429610391296}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5378812603520}, {'type': 'perplexity', 'in_batch_dim': [8, 400], 'batch_size': 8, 'flops': 7910018502784}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5378812603520}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3480408179072}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6011614078336}, {'type': 'perplexity', 'in_batch_dim': [8, 160], 'batch_size': 8, 'flops': 3164007441664}, {'type': 'perplexity', 'in_batch_dim': [8, 352], 'batch_size': 8, 'flops': 6960816290560}, {'type': 'perplexity', 'in_batch_dim': [8, 320], 'batch_size': 8, 'flops': 6328014815744}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5062411866112}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5062411866112}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4113209653888}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4429610391296}, {'type': 'perplexity', 'in_batch_dim': [8, 352], 'batch_size': 8, 'flops': 6960816290560}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4113209653888}, {'type': 'perplexity', 'in_batch_dim': [8, 160], 'batch_size': 8, 'flops': 3164007441664}, {'type': 'perplexity', 'in_batch_dim': [8, 336], 'batch_size': 8, 'flops': 6644415553152}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4429610391296}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3796808916480}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5378812603520}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5378812603520}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4113209653888}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5378812603520}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3796808916480}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5378812603520}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5378812603520}, {'type': 'perplexity', 'in_batch_dim': [8, 160], 'batch_size': 8, 'flops': 3164007441664}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3480408179072}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6011614078336}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5695213340928}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5378812603520}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5378812603520}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4429610391296}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4746011128704}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6011614078336}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5062411866112}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6011614078336}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5378812603520}, {'type': 'perplexity', 'in_batch_dim': [8, 160], 'batch_size': 8, 'flops': 3164007441664}, {'type': 'perplexity', 'in_batch_dim': [8, 160], 'batch_size': 8, 'flops': 3164007441664}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5062411866112}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5062411866112}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3796808916480}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6011614078336}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4113209653888}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4746011128704}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5378812603520}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3480408179072}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3796808916480}, {'type': 'perplexity', 'in_batch_dim': [8, 320], 'batch_size': 8, 'flops': 6328014815744}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5062411866112}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4746011128704}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5062411866112}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4746011128704}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6011614078336}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3796808916480}, {'type': 'perplexity', 'in_batch_dim': [8, 160], 'batch_size': 8, 'flops': 3164007441664}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2847606704256}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4429610391296}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4746011128704}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5062411866112}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5378812603520}, {'type': 'perplexity', 'in_batch_dim': [8, 464], 'batch_size': 8, 'flops': 9175621452416}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4113209653888}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3480408179072}, {'type': 'perplexity', 'in_batch_dim': [8, 384], 'batch_size': 8, 'flops': 7593617765376}, {'type': 'perplexity', 'in_batch_dim': [8, 448], 'batch_size': 8, 'flops': 8859220715008}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5378812603520}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4113209653888}, {'type': 'perplexity', 'in_batch_dim': [8, 320], 'batch_size': 8, 'flops': 6328014815744}, {'type': 'perplexity', 'in_batch_dim': [8, 560], 'batch_size': 8, 'flops': 11074025876864}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5378812603520}, {'type': 'perplexity', 'in_batch_dim': [8, 160], 'batch_size': 8, 'flops': 3164007441664}, {'type': 'perplexity', 'in_batch_dim': [8, 320], 'batch_size': 8, 'flops': 6328014815744}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5062411866112}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6011614078336}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4746011128704}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6011614078336}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3796808916480}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4429610391296}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4113209653888}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4746011128704}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3480408179072}, {'type': 'perplexity', 'in_batch_dim': [8, 384], 'batch_size': 8, 'flops': 7593617765376}, {'type': 'perplexity', 'in_batch_dim': [8, 576], 'batch_size': 8, 'flops': 11390426614272}, {'type': 'perplexity', 'in_batch_dim': [8, 160], 'batch_size': 8, 'flops': 3164007441664}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4746011128704}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4746011128704}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4429610391296}, {'type': 'perplexity', 'in_batch_dim': [8, 336], 'batch_size': 8, 'flops': 6644415553152}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4746011128704}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3796808916480}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3796808916480}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5062411866112}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5378812603520}, {'type': 'perplexity', 'in_batch_dim': [8, 368], 'batch_size': 8, 'flops': 7277217027968}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3480408179072}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4113209653888}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4746011128704}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3480408179072}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4746011128704}, {'type': 'perplexity', 'in_batch_dim': [8, 1168], 'batch_size': 8, 'flops': 23097253898368}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5062411866112}, {'type': 'perplexity', 'in_batch_dim': [8, 160], 'batch_size': 8, 'flops': 3164007441664}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4746011128704}, {'type': 'perplexity', 'in_batch_dim': [8, 640], 'batch_size': 8, 'flops': 12656029563904}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5378812603520}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4429610391296}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4429610391296}, {'type': 'perplexity', 'in_batch_dim': [8, 384], 'batch_size': 8, 'flops': 7593617765376}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3480408179072}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4113209653888}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3796808916480}, {'type': 'perplexity', 'in_batch_dim': [8, 336], 'batch_size': 8, 'flops': 6644415553152}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3480408179072}, {'type': 'perplexity', 'in_batch_dim': [6, 208], 'batch_size': 8, 'flops': 4113209653888}], 'timestamp': '2025-09-10 02:19:25.242216', 'step': 1617, 'epoch': 1} {'type': 'pplx', 'content': 14254475.265608242, 'timestamp': '2025-09-10 02:19:25.257155', 'step': 1617, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 5695507448064}, 'timestamp': '2025-09-10 02:19:25.296379', 'step': 1617, 'epoch': 1} {'type': 'loss', 'content': 0.0030304626561701298, 'timestamp': '2025-09-10 02:19:25.299826', 'step': 1618, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 304], 'flops': 9017733530176}, 'timestamp': '2025-09-10 02:19:25.332126', 'step': 1618, 'epoch': 1} {'type': 'loss', 'content': 0.0015295592602342367, 'timestamp': '2025-09-10 02:19:25.343829', 'step': 1619, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 6644714900096}, 'timestamp': '2025-09-10 02:19:25.375724', 'step': 1619, 'epoch': 1} {'type': 'loss', 'content': 0.010093179531395435, 'timestamp': '2025-09-10 02:19:25.403746', 'step': 1620, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 7593922352128}, 'timestamp': '2025-09-10 02:19:25.442447', 'step': 1620, 'epoch': 1} {'type': 'loss', 'content': 0.04103449359536171, 'timestamp': '2025-09-10 02:19:25.447594', 'step': 1621, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 320], 'flops': 9492337256192}, 'timestamp': '2025-09-10 02:19:25.480406', 'step': 1621, 'epoch': 1} {'type': 'loss', 'content': 0.030527640134096146, 'timestamp': '2025-09-10 02:19:25.492574', 'step': 1622, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 7593922352128}, 'timestamp': '2025-09-10 02:19:25.523519', 'step': 1622, 'epoch': 1} {'type': 'loss', 'content': 0.03347934037446976, 'timestamp': '2025-09-10 02:19:25.531231', 'step': 1623, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 7593922352128}, 'timestamp': '2025-09-10 02:19:25.561588', 'step': 1623, 'epoch': 1} {'type': 'loss', 'content': 0.005834028124809265, 'timestamp': '2025-09-10 02:19:25.590191', 'step': 1624, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 7119318626112}, 'timestamp': '2025-09-10 02:19:25.623761', 'step': 1624, 'epoch': 1} {'type': 'loss', 'content': 0.011887645348906517, 'timestamp': '2025-09-10 02:19:25.627883', 'step': 1625, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 304], 'flops': 9017733530176}, 'timestamp': '2025-09-10 02:19:25.660861', 'step': 1625, 'epoch': 1} {'type': 'loss', 'content': 0.020918427035212517, 'timestamp': '2025-09-10 02:19:25.671539', 'step': 1626, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 6644714900096}, 'timestamp': '2025-09-10 02:19:25.707325', 'step': 1626, 'epoch': 1} {'type': 'loss', 'content': 0.004566980060189962, 'timestamp': '2025-09-10 02:19:25.713071', 'step': 1627, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 5695507448064}, 'timestamp': '2025-09-10 02:19:25.744771', 'step': 1627, 'epoch': 1} {'type': 'loss', 'content': 0.01930670067667961, 'timestamp': '2025-09-10 02:19:25.769696', 'step': 1628, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 6644714900096}, 'timestamp': '2025-09-10 02:19:25.800237', 'step': 1628, 'epoch': 1} {'type': 'loss', 'content': 0.01942528784275055, 'timestamp': '2025-09-10 02:19:25.804804', 'step': 1629, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 8068526078144}, 'timestamp': '2025-09-10 02:19:25.835511', 'step': 1629, 'epoch': 1} {'type': 'loss', 'content': 0.012651464901864529, 'timestamp': '2025-09-10 02:19:25.845581', 'step': 1630, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 176], 'flops': 5220903722048}, 'timestamp': '2025-09-10 02:19:25.885732', 'step': 1630, 'epoch': 1} {'type': 'loss', 'content': 0.028396448120474815, 'timestamp': '2025-09-10 02:19:25.889920', 'step': 1631, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 8068526078144}, 'timestamp': '2025-09-10 02:19:25.924490', 'step': 1631, 'epoch': 1} {'type': 'loss', 'content': 0.010911746881902218, 'timestamp': '2025-09-10 02:19:25.955142', 'step': 1632, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 6170111174080}, 'timestamp': '2025-09-10 02:19:25.986806', 'step': 1632, 'epoch': 1} {'type': 'loss', 'content': 0.031062575057148933, 'timestamp': '2025-09-10 02:19:25.991159', 'step': 1633, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 8068526078144}, 'timestamp': '2025-09-10 02:19:26.025471', 'step': 1633, 'epoch': 1} {'type': 'loss', 'content': 0.010253122076392174, 'timestamp': '2025-09-10 02:19:26.034930', 'step': 1634, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 176], 'flops': 5220903722048}, 'timestamp': '2025-09-10 02:19:26.067458', 'step': 1634, 'epoch': 1} {'type': 'loss', 'content': 0.015442321076989174, 'timestamp': '2025-09-10 02:19:26.071192', 'step': 1635, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 5695507448064}, 'timestamp': '2025-09-10 02:19:26.103396', 'step': 1635, 'epoch': 1} {'type': 'loss', 'content': 0.004601712804287672, 'timestamp': '2025-09-10 02:19:26.128461', 'step': 1636, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 7593922352128}, 'timestamp': '2025-09-10 02:19:26.169924', 'step': 1636, 'epoch': 1} {'type': 'loss', 'content': 0.006141430698335171, 'timestamp': '2025-09-10 02:19:26.174411', 'step': 1637, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 176], 'flops': 5220903722048}, 'timestamp': '2025-09-10 02:19:26.211983', 'step': 1637, 'epoch': 1} {'type': 'loss', 'content': 0.004029339645057917, 'timestamp': '2025-09-10 02:19:26.215841', 'step': 1638, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 5695507448064}, 'timestamp': '2025-09-10 02:19:26.247780', 'step': 1638, 'epoch': 1} {'type': 'loss', 'content': 0.010412991046905518, 'timestamp': '2025-09-10 02:19:26.252030', 'step': 1639, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 7593922352128}, 'timestamp': '2025-09-10 02:19:26.283809', 'step': 1639, 'epoch': 1} {'type': 'loss', 'content': 0.013454841449856758, 'timestamp': '2025-09-10 02:19:26.312179', 'step': 1640, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 8068526078144}, 'timestamp': '2025-09-10 02:19:26.344145', 'step': 1640, 'epoch': 1} {'type': 'loss', 'content': 0.02152983471751213, 'timestamp': '2025-09-10 02:19:26.351604', 'step': 1641, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 6644714900096}, 'timestamp': '2025-09-10 02:19:26.392487', 'step': 1641, 'epoch': 1} {'type': 'loss', 'content': 0.006329267751425505, 'timestamp': '2025-09-10 02:19:26.399247', 'step': 1642, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 304], 'flops': 9017733530176}, 'timestamp': '2025-09-10 02:19:26.437367', 'step': 1642, 'epoch': 1} {'type': 'loss', 'content': 0.01075258944183588, 'timestamp': '2025-09-10 02:19:26.449101', 'step': 1643, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 6170111174080}, 'timestamp': '2025-09-10 02:19:26.487476', 'step': 1643, 'epoch': 1} {'type': 'loss', 'content': 0.0039253514260053635, 'timestamp': '2025-09-10 02:19:26.515046', 'step': 1644, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 6170111174080}, 'timestamp': '2025-09-10 02:19:26.556068', 'step': 1644, 'epoch': 1} {'type': 'loss', 'content': 0.012616248801350594, 'timestamp': '2025-09-10 02:19:26.560104', 'step': 1645, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 6644714900096}, 'timestamp': '2025-09-10 02:19:26.597052', 'step': 1645, 'epoch': 1} {'type': 'loss', 'content': 0.019528865814208984, 'timestamp': '2025-09-10 02:19:26.603995', 'step': 1646, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 7119318626112}, 'timestamp': '2025-09-10 02:19:26.638664', 'step': 1646, 'epoch': 1} {'type': 'loss', 'content': 0.0033076356630772352, 'timestamp': '2025-09-10 02:19:26.646191', 'step': 1647, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 352], 'flops': 10441544708224}, 'timestamp': '2025-09-10 02:19:26.681923', 'step': 1647, 'epoch': 1} {'type': 'loss', 'content': 0.01205162238329649, 'timestamp': '2025-09-10 02:19:26.716520', 'step': 1648, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 176], 'flops': 5220903722048}, 'timestamp': '2025-09-10 02:19:26.751236', 'step': 1648, 'epoch': 1} {'type': 'loss', 'content': 0.020559037104249, 'timestamp': '2025-09-10 02:19:26.753290', 'step': 1649, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 7593922352128}, 'timestamp': '2025-09-10 02:19:26.784471', 'step': 1649, 'epoch': 1} {'type': 'loss', 'content': 0.013713809661567211, 'timestamp': '2025-09-10 02:19:26.792067', 'step': 1650, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 7119318626112}, 'timestamp': '2025-09-10 02:19:26.824240', 'step': 1650, 'epoch': 1} {'type': 'loss', 'content': 0.026908008381724358, 'timestamp': '2025-09-10 02:19:26.831139', 'step': 1651, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 7119318626112}, 'timestamp': '2025-09-10 02:19:26.864818', 'step': 1651, 'epoch': 1} {'type': 'loss', 'content': 0.019599396735429764, 'timestamp': '2025-09-10 02:19:26.891705', 'step': 1652, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 400], 'flops': 11865355886272}, 'timestamp': '2025-09-10 02:19:26.928007', 'step': 1652, 'epoch': 1} {'type': 'loss', 'content': 0.02248522825539112, 'timestamp': '2025-09-10 02:19:26.943137', 'step': 1653, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 7119318626112}, 'timestamp': '2025-09-10 02:19:26.976637', 'step': 1653, 'epoch': 1} {'type': 'loss', 'content': 0.0142592191696167, 'timestamp': '2025-09-10 02:19:26.982632', 'step': 1654, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 7119318626112}, 'timestamp': '2025-09-10 02:19:27.016068', 'step': 1654, 'epoch': 1} {'type': 'loss', 'content': 0.020718032494187355, 'timestamp': '2025-09-10 02:19:27.022551', 'step': 1655, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 6644714900096}, 'timestamp': '2025-09-10 02:19:27.054261', 'step': 1655, 'epoch': 1} {'type': 'loss', 'content': 0.05304405093193054, 'timestamp': '2025-09-10 02:19:27.081700', 'step': 1656, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 400], 'flops': 11865355886272}, 'timestamp': '2025-09-10 02:19:27.118254', 'step': 1656, 'epoch': 1} {'type': 'loss', 'content': 0.040047433227300644, 'timestamp': '2025-09-10 02:19:27.133381', 'step': 1657, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 464], 'flops': 13763770790336}, 'timestamp': '2025-09-10 02:19:27.174828', 'step': 1657, 'epoch': 1} {'type': 'loss', 'content': 0.02052774466574192, 'timestamp': '2025-09-10 02:19:27.191899', 'step': 1658, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 6644714900096}, 'timestamp': '2025-09-10 02:19:27.225404', 'step': 1658, 'epoch': 1} {'type': 'loss', 'content': 0.013448750600218773, 'timestamp': '2025-09-10 02:19:27.231262', 'step': 1659, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 6170111174080}, 'timestamp': '2025-09-10 02:19:27.262310', 'step': 1659, 'epoch': 1} {'type': 'loss', 'content': 0.005621184129267931, 'timestamp': '2025-09-10 02:19:27.289514', 'step': 1660, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 320], 'flops': 9492337256192}, 'timestamp': '2025-09-10 02:19:27.320820', 'step': 1660, 'epoch': 1} {'type': 'loss', 'content': 0.020898720249533653, 'timestamp': '2025-09-10 02:19:27.330737', 'step': 1661, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 320], 'flops': 9492337256192}, 'timestamp': '2025-09-10 02:19:27.362638', 'step': 1661, 'epoch': 1} {'type': 'loss', 'content': 0.017461569979786873, 'timestamp': '2025-09-10 02:19:27.375213', 'step': 1662, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 496], 'flops': 14712978242368}, 'timestamp': '2025-09-10 02:19:27.418839', 'step': 1662, 'epoch': 1} {'type': 'loss', 'content': 0.023927049711346626, 'timestamp': '2025-09-10 02:19:27.436385', 'step': 1663, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 320], 'flops': 9492337256192}, 'timestamp': '2025-09-10 02:19:27.472070', 'step': 1663, 'epoch': 1} {'type': 'loss', 'content': 0.011457578279078007, 'timestamp': '2025-09-10 02:19:27.505457', 'step': 1664, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 6170111174080}, 'timestamp': '2025-09-10 02:19:27.540203', 'step': 1664, 'epoch': 1} {'type': 'loss', 'content': 0.0047464510425925255, 'timestamp': '2025-09-10 02:19:27.544670', 'step': 1665, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 5695507448064}, 'timestamp': '2025-09-10 02:19:27.577596', 'step': 1665, 'epoch': 1} {'type': 'loss', 'content': 0.03707936406135559, 'timestamp': '2025-09-10 02:19:27.582577', 'step': 1666, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 4746299996032}, 'timestamp': '2025-09-10 02:19:27.614332', 'step': 1666, 'epoch': 1} {'type': 'loss', 'content': 0.042561326175928116, 'timestamp': '2025-09-10 02:19:27.617702', 'step': 1667, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 304], 'flops': 9017733530176}, 'timestamp': '2025-09-10 02:19:27.650523', 'step': 1667, 'epoch': 1} {'type': 'loss', 'content': 0.005984437186270952, 'timestamp': '2025-09-10 02:19:27.683287', 'step': 1668, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 8543129804160}, 'timestamp': '2025-09-10 02:19:27.714055', 'step': 1668, 'epoch': 1} {'type': 'loss', 'content': 0.011304566636681557, 'timestamp': '2025-09-10 02:19:27.722639', 'step': 1669, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 6644714900096}, 'timestamp': '2025-09-10 02:19:27.754009', 'step': 1669, 'epoch': 1} {'type': 'loss', 'content': 0.0555756576359272, 'timestamp': '2025-09-10 02:19:27.761052', 'step': 1670, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 176], 'flops': 5220903722048}, 'timestamp': '2025-09-10 02:19:27.791616', 'step': 1670, 'epoch': 1} {'type': 'loss', 'content': 0.014852997846901417, 'timestamp': '2025-09-10 02:19:27.795734', 'step': 1671, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 6170111174080}, 'timestamp': '2025-09-10 02:19:27.826214', 'step': 1671, 'epoch': 1} {'type': 'loss', 'content': 0.000465600925963372, 'timestamp': '2025-09-10 02:19:27.853922', 'step': 1672, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 7119318626112}, 'timestamp': '2025-09-10 02:19:27.884679', 'step': 1672, 'epoch': 1} {'type': 'loss', 'content': 0.011479363776743412, 'timestamp': '2025-09-10 02:19:27.889997', 'step': 1673, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 8068526078144}, 'timestamp': '2025-09-10 02:19:27.921029', 'step': 1673, 'epoch': 1} {'type': 'loss', 'content': 0.00886636320501566, 'timestamp': '2025-09-10 02:19:27.931296', 'step': 1674, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 6644714900096}, 'timestamp': '2025-09-10 02:19:27.962835', 'step': 1674, 'epoch': 1} {'type': 'loss', 'content': 0.033997684717178345, 'timestamp': '2025-09-10 02:19:27.969980', 'step': 1675, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 304], 'flops': 9017733530176}, 'timestamp': '2025-09-10 02:19:28.011996', 'step': 1675, 'epoch': 1} {'type': 'loss', 'content': 0.028899533674120903, 'timestamp': '2025-09-10 02:19:28.044941', 'step': 1676, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 6644714900096}, 'timestamp': '2025-09-10 02:19:28.075988', 'step': 1676, 'epoch': 1} {'type': 'loss', 'content': 0.015834344550967216, 'timestamp': '2025-09-10 02:19:28.080818', 'step': 1677, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 4746299996032}, 'timestamp': '2025-09-10 02:19:28.126402', 'step': 1677, 'epoch': 1} {'type': 'loss', 'content': 0.0010925616370514035, 'timestamp': '2025-09-10 02:19:28.133192', 'step': 1678, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 8543129804160}, 'timestamp': '2025-09-10 02:19:28.165787', 'step': 1678, 'epoch': 1} {'type': 'loss', 'content': 0.011072367429733276, 'timestamp': '2025-09-10 02:19:28.176200', 'step': 1679, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 432], 'flops': 12814563338304}, 'timestamp': '2025-09-10 02:19:28.215114', 'step': 1679, 'epoch': 1} {'type': 'loss', 'content': 0.014479962177574635, 'timestamp': '2025-09-10 02:19:28.252184', 'step': 1680, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 7119318626112}, 'timestamp': '2025-09-10 02:19:28.283422', 'step': 1680, 'epoch': 1} {'type': 'loss', 'content': 0.005413788836449385, 'timestamp': '2025-09-10 02:19:28.288347', 'step': 1681, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 384], 'flops': 11390752160256}, 'timestamp': '2025-09-10 02:19:28.323315', 'step': 1681, 'epoch': 1} {'type': 'loss', 'content': 0.007339499890804291, 'timestamp': '2025-09-10 02:19:28.337248', 'step': 1682, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 5695507448064}, 'timestamp': '2025-09-10 02:19:28.368533', 'step': 1682, 'epoch': 1} {'type': 'loss', 'content': 0.010168512351810932, 'timestamp': '2025-09-10 02:19:28.373074', 'step': 1683, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 368], 'flops': 10916148434240}, 'timestamp': '2025-09-10 02:19:28.408455', 'step': 1683, 'epoch': 1} {'type': 'loss', 'content': 0.007975684478878975, 'timestamp': '2025-09-10 02:19:28.443073', 'step': 1684, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 6170111174080}, 'timestamp': '2025-09-10 02:19:28.473739', 'step': 1684, 'epoch': 1} {'type': 'loss', 'content': 0.015236958861351013, 'timestamp': '2025-09-10 02:19:28.478172', 'step': 1685, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 7119318626112}, 'timestamp': '2025-09-10 02:19:28.508675', 'step': 1685, 'epoch': 1} {'type': 'loss', 'content': 0.010547908022999763, 'timestamp': '2025-09-10 02:19:28.516263', 'step': 1686, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 6644714900096}, 'timestamp': '2025-09-10 02:19:28.547696', 'step': 1686, 'epoch': 1} {'type': 'loss', 'content': 0.0036612115800380707, 'timestamp': '2025-09-10 02:19:28.554535', 'step': 1687, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 336], 'flops': 9966940982208}, 'timestamp': '2025-09-10 02:19:28.587864', 'step': 1687, 'epoch': 1} {'type': 'loss', 'content': 0.02034112438559532, 'timestamp': '2025-09-10 02:19:28.622149', 'step': 1688, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 304], 'flops': 9017733530176}, 'timestamp': '2025-09-10 02:19:28.657085', 'step': 1688, 'epoch': 1} {'type': 'loss', 'content': 0.012231721542775631, 'timestamp': '2025-09-10 02:19:28.667076', 'step': 1689, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 8068526078144}, 'timestamp': '2025-09-10 02:19:28.701677', 'step': 1689, 'epoch': 1} {'type': 'loss', 'content': 0.006200658623129129, 'timestamp': '2025-09-10 02:19:28.710829', 'step': 1690, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 5695507448064}, 'timestamp': '2025-09-10 02:19:28.741882', 'step': 1690, 'epoch': 1} {'type': 'loss', 'content': 0.011389417573809624, 'timestamp': '2025-09-10 02:19:28.746123', 'step': 1691, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 7119318626112}, 'timestamp': '2025-09-10 02:19:28.777247', 'step': 1691, 'epoch': 1} {'type': 'loss', 'content': 0.028033211827278137, 'timestamp': '2025-09-10 02:19:28.805545', 'step': 1692, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 6644714900096}, 'timestamp': '2025-09-10 02:19:28.837135', 'step': 1692, 'epoch': 1} {'type': 'loss', 'content': 0.007890121079981327, 'timestamp': '2025-09-10 02:19:28.841907', 'step': 1693, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 4746299996032}, 'timestamp': '2025-09-10 02:19:28.872512', 'step': 1693, 'epoch': 1} {'type': 'loss', 'content': 0.03063378855586052, 'timestamp': '2025-09-10 02:19:28.875117', 'step': 1694, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 8543129804160}, 'timestamp': '2025-09-10 02:19:28.905933', 'step': 1694, 'epoch': 1} {'type': 'loss', 'content': 0.0022948638070374727, 'timestamp': '2025-09-10 02:19:28.916771', 'step': 1695, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 8068526078144}, 'timestamp': '2025-09-10 02:19:28.947133', 'step': 1695, 'epoch': 1} {'type': 'loss', 'content': 0.002465051133185625, 'timestamp': '2025-09-10 02:19:28.978206', 'step': 1696, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 304], 'flops': 9017733530176}, 'timestamp': '2025-09-10 02:19:29.008980', 'step': 1696, 'epoch': 1} {'type': 'loss', 'content': 0.009558682329952717, 'timestamp': '2025-09-10 02:19:29.018783', 'step': 1697, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 7593922352128}, 'timestamp': '2025-09-10 02:19:29.049904', 'step': 1697, 'epoch': 1} {'type': 'loss', 'content': 0.016664791852235794, 'timestamp': '2025-09-10 02:19:29.057524', 'step': 1698, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 352], 'flops': 10441544708224}, 'timestamp': '2025-09-10 02:19:29.091977', 'step': 1698, 'epoch': 1} {'type': 'loss', 'content': 0.013536560349166393, 'timestamp': '2025-09-10 02:19:29.105628', 'step': 1699, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 416], 'flops': 12339959612288}, 'timestamp': '2025-09-10 02:19:29.144132', 'step': 1699, 'epoch': 1} {'type': 'loss', 'content': 0.008019420318305492, 'timestamp': '2025-09-10 02:19:29.180893', 'step': 1700, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 7119318626112}, 'timestamp': '2025-09-10 02:19:29.210938', 'step': 1700, 'epoch': 1} {'type': 'loss', 'content': 0.010356190614402294, 'timestamp': '2025-09-10 02:19:29.215979', 'step': 1701, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 176], 'flops': 5220903722048}, 'timestamp': '2025-09-10 02:19:29.256203', 'step': 1701, 'epoch': 1} {'type': 'loss', 'content': 0.016179528087377548, 'timestamp': '2025-09-10 02:19:29.260046', 'step': 1702, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 336], 'flops': 9966940982208}, 'timestamp': '2025-09-10 02:19:29.293405', 'step': 1702, 'epoch': 1} {'type': 'loss', 'content': 0.018332941457629204, 'timestamp': '2025-09-10 02:19:29.306772', 'step': 1703, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 8068526078144}, 'timestamp': '2025-09-10 02:19:29.338414', 'step': 1703, 'epoch': 1} {'type': 'loss', 'content': 0.02060030773282051, 'timestamp': '2025-09-10 02:19:29.369647', 'step': 1704, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 8068526078144}, 'timestamp': '2025-09-10 02:19:29.403513', 'step': 1704, 'epoch': 1} {'type': 'loss', 'content': 0.0144452890381217, 'timestamp': '2025-09-10 02:19:29.411462', 'step': 1705, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 7119318626112}, 'timestamp': '2025-09-10 02:19:29.442717', 'step': 1705, 'epoch': 1} {'type': 'loss', 'content': 0.016921203583478928, 'timestamp': '2025-09-10 02:19:29.449999', 'step': 1706, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 6170111174080}, 'timestamp': '2025-09-10 02:19:29.480120', 'step': 1706, 'epoch': 1} {'type': 'loss', 'content': 0.03076860122382641, 'timestamp': '2025-09-10 02:19:29.486961', 'step': 1707, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 320], 'flops': 9492337256192}, 'timestamp': '2025-09-10 02:19:29.518046', 'step': 1707, 'epoch': 1} {'type': 'loss', 'content': 0.010674857534468174, 'timestamp': '2025-09-10 02:19:29.551498', 'step': 1708, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 7119318626112}, 'timestamp': '2025-09-10 02:19:29.582268', 'step': 1708, 'epoch': 1} {'type': 'loss', 'content': 0.004811963532119989, 'timestamp': '2025-09-10 02:19:29.587283', 'step': 1709, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 8068526078144}, 'timestamp': '2025-09-10 02:19:29.617891', 'step': 1709, 'epoch': 1} {'type': 'loss', 'content': 0.02059813216328621, 'timestamp': '2025-09-10 02:19:29.628103', 'step': 1710, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 7119318626112}, 'timestamp': '2025-09-10 02:19:29.661190', 'step': 1710, 'epoch': 1} {'type': 'loss', 'content': 0.04477255791425705, 'timestamp': '2025-09-10 02:19:29.668702', 'step': 1711, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 6644714900096}, 'timestamp': '2025-09-10 02:19:29.699404', 'step': 1711, 'epoch': 1} {'type': 'loss', 'content': 0.022556250914931297, 'timestamp': '2025-09-10 02:19:29.727279', 'step': 1712, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 352], 'flops': 10441544708224}, 'timestamp': '2025-09-10 02:19:29.760289', 'step': 1712, 'epoch': 1} {'type': 'loss', 'content': 0.014114036224782467, 'timestamp': '2025-09-10 02:19:29.773276', 'step': 1713, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 6644714900096}, 'timestamp': '2025-09-10 02:19:29.803259', 'step': 1713, 'epoch': 1} {'type': 'loss', 'content': 0.02027253620326519, 'timestamp': '2025-09-10 02:19:29.810376', 'step': 1714, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 4746299996032}, 'timestamp': '2025-09-10 02:19:29.840408', 'step': 1714, 'epoch': 1} {'type': 'loss', 'content': 0.004980639088898897, 'timestamp': '2025-09-10 02:19:29.844370', 'step': 1715, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 7593922352128}, 'timestamp': '2025-09-10 02:19:29.877268', 'step': 1715, 'epoch': 1} {'type': 'loss', 'content': 0.01874137483537197, 'timestamp': '2025-09-10 02:19:29.905832', 'step': 1716, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 4746299996032}, 'timestamp': '2025-09-10 02:19:29.936597', 'step': 1716, 'epoch': 1} {'type': 'loss', 'content': 0.00487111508846283, 'timestamp': '2025-09-10 02:19:29.938607', 'step': 1717, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 528], 'flops': 15662185694400}, 'timestamp': '2025-09-10 02:19:29.984718', 'step': 1717, 'epoch': 1} {'type': 'loss', 'content': 0.008420931175351143, 'timestamp': '2025-09-10 02:19:30.003909', 'step': 1718, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 6644714900096}, 'timestamp': '2025-09-10 02:19:30.034431', 'step': 1718, 'epoch': 1} {'type': 'loss', 'content': 0.00927684735506773, 'timestamp': '2025-09-10 02:19:30.041409', 'step': 1719, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 320], 'flops': 9492337256192}, 'timestamp': '2025-09-10 02:19:30.074103', 'step': 1719, 'epoch': 1} {'type': 'loss', 'content': 0.00768580287694931, 'timestamp': '2025-09-10 02:19:30.107605', 'step': 1720, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:19:30.137477', 'step': 1720, 'epoch': 1} {'type': 'loss', 'content': 0.01326842326670885, 'timestamp': '2025-09-10 02:19:30.139625', 'step': 1721, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 8543129804160}, 'timestamp': '2025-09-10 02:19:30.171634', 'step': 1721, 'epoch': 1} {'type': 'loss', 'content': 0.004389611072838306, 'timestamp': '2025-09-10 02:19:30.182588', 'step': 1722, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 5695507448064}, 'timestamp': '2025-09-10 02:19:30.213265', 'step': 1722, 'epoch': 1} {'type': 'loss', 'content': 0.020359130576252937, 'timestamp': '2025-09-10 02:19:30.217724', 'step': 1723, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 384], 'flops': 11390752160256}, 'timestamp': '2025-09-10 02:19:30.253023', 'step': 1723, 'epoch': 1} {'type': 'loss', 'content': 0.019980600103735924, 'timestamp': '2025-09-10 02:19:30.287976', 'step': 1724, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 8068526078144}, 'timestamp': '2025-09-10 02:19:30.319961', 'step': 1724, 'epoch': 1} {'type': 'loss', 'content': 0.012974241748452187, 'timestamp': '2025-09-10 02:19:30.327257', 'step': 1725, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 7593922352128}, 'timestamp': '2025-09-10 02:19:30.359348', 'step': 1725, 'epoch': 1} {'type': 'loss', 'content': 0.012521286495029926, 'timestamp': '2025-09-10 02:19:30.367171', 'step': 1726, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 352], 'flops': 10441544708224}, 'timestamp': '2025-09-10 02:19:30.401011', 'step': 1726, 'epoch': 1} {'type': 'loss', 'content': 0.01130104623734951, 'timestamp': '2025-09-10 02:19:30.414735', 'step': 1727, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 7593922352128}, 'timestamp': '2025-09-10 02:19:30.446730', 'step': 1727, 'epoch': 1} {'type': 'loss', 'content': 0.032924991101026535, 'timestamp': '2025-09-10 02:19:30.475325', 'step': 1728, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 6170111174080}, 'timestamp': '2025-09-10 02:19:30.507565', 'step': 1728, 'epoch': 1} {'type': 'loss', 'content': 0.01869148574769497, 'timestamp': '2025-09-10 02:19:30.511832', 'step': 1729, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 4746299996032}, 'timestamp': '2025-09-10 02:19:30.542177', 'step': 1729, 'epoch': 1} {'type': 'loss', 'content': 0.003208654234185815, 'timestamp': '2025-09-10 02:19:30.544625', 'step': 1730, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 4746299996032}, 'timestamp': '2025-09-10 02:19:30.574883', 'step': 1730, 'epoch': 1} {'type': 'loss', 'content': 0.008543224073946476, 'timestamp': '2025-09-10 02:19:30.577567', 'step': 1731, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 6170111174080}, 'timestamp': '2025-09-10 02:19:30.608550', 'step': 1731, 'epoch': 1} {'type': 'loss', 'content': 0.009434954263269901, 'timestamp': '2025-09-10 02:19:30.636421', 'step': 1732, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 416], 'flops': 12339959612288}, 'timestamp': '2025-09-10 02:19:30.673056', 'step': 1732, 'epoch': 1} {'type': 'loss', 'content': 0.05541825294494629, 'timestamp': '2025-09-10 02:19:30.688495', 'step': 1733, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 5695507448064}, 'timestamp': '2025-09-10 02:19:30.719019', 'step': 1733, 'epoch': 1} {'type': 'loss', 'content': 0.0067622484639286995, 'timestamp': '2025-09-10 02:19:30.723483', 'step': 1734, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 7593922352128}, 'timestamp': '2025-09-10 02:19:30.754703', 'step': 1734, 'epoch': 1} {'type': 'loss', 'content': 0.015380342490971088, 'timestamp': '2025-09-10 02:19:30.762473', 'step': 1735, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 7119318626112}, 'timestamp': '2025-09-10 02:19:30.792914', 'step': 1735, 'epoch': 1} {'type': 'loss', 'content': 0.017134329304099083, 'timestamp': '2025-09-10 02:19:30.821094', 'step': 1736, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 176], 'flops': 5220903722048}, 'timestamp': '2025-09-10 02:19:30.851110', 'step': 1736, 'epoch': 1} {'type': 'loss', 'content': 0.03203447908163071, 'timestamp': '2025-09-10 02:19:30.853371', 'step': 1737, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 6170111174080}, 'timestamp': '2025-09-10 02:19:30.892062', 'step': 1737, 'epoch': 1} {'type': 'loss', 'content': 0.003379482077434659, 'timestamp': '2025-09-10 02:19:30.898984', 'step': 1738, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 336], 'flops': 9966940982208}, 'timestamp': '2025-09-10 02:19:30.939908', 'step': 1738, 'epoch': 1} {'type': 'loss', 'content': 0.011437847279012203, 'timestamp': '2025-09-10 02:19:30.953293', 'step': 1739, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 7593922352128}, 'timestamp': '2025-09-10 02:19:30.985412', 'step': 1739, 'epoch': 1} {'type': 'loss', 'content': 0.004656031262129545, 'timestamp': '2025-09-10 02:19:31.014134', 'step': 1740, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 7593922352128}, 'timestamp': '2025-09-10 02:19:31.045209', 'step': 1740, 'epoch': 1} {'type': 'loss', 'content': 0.003976056352257729, 'timestamp': '2025-09-10 02:19:31.050582', 'step': 1741, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 5695507448064}, 'timestamp': '2025-09-10 02:19:31.083172', 'step': 1741, 'epoch': 1} {'type': 'loss', 'content': 0.004918371327221394, 'timestamp': '2025-09-10 02:19:31.087586', 'step': 1742, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 608], 'flops': 18035204324480}, 'timestamp': '2025-09-10 02:19:31.141165', 'step': 1742, 'epoch': 1} {'type': 'loss', 'content': 0.026369964703917503, 'timestamp': '2025-09-10 02:19:31.162721', 'step': 1743, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 4746299996032}, 'timestamp': '2025-09-10 02:19:31.193380', 'step': 1743, 'epoch': 1} {'type': 'loss', 'content': 0.0038020031061023474, 'timestamp': '2025-09-10 02:19:31.217175', 'step': 1744, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 304], 'flops': 9017733530176}, 'timestamp': '2025-09-10 02:19:31.247831', 'step': 1744, 'epoch': 1} {'type': 'loss', 'content': 0.0021418523974716663, 'timestamp': '2025-09-10 02:19:31.257520', 'step': 1745, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 176], 'flops': 5220903722048}, 'timestamp': '2025-09-10 02:19:31.288992', 'step': 1745, 'epoch': 1} {'type': 'loss', 'content': 0.00539687043055892, 'timestamp': '2025-09-10 02:19:31.292992', 'step': 1746, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 5695507448064}, 'timestamp': '2025-09-10 02:19:31.323652', 'step': 1746, 'epoch': 1} {'type': 'loss', 'content': 0.003791423747316003, 'timestamp': '2025-09-10 02:19:31.327975', 'step': 1747, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 4746299996032}, 'timestamp': '2025-09-10 02:19:31.358015', 'step': 1747, 'epoch': 1} {'type': 'loss', 'content': 0.005174871999770403, 'timestamp': '2025-09-10 02:19:31.381555', 'step': 1748, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 4746299996032}, 'timestamp': '2025-09-10 02:19:31.413188', 'step': 1748, 'epoch': 1} {'type': 'loss', 'content': 0.009157408960163593, 'timestamp': '2025-09-10 02:19:31.415329', 'step': 1749, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 4746299996032}, 'timestamp': '2025-09-10 02:19:31.445518', 'step': 1749, 'epoch': 1} {'type': 'loss', 'content': 0.012222186662256718, 'timestamp': '2025-09-10 02:19:31.448319', 'step': 1750, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 448], 'flops': 13289167064320}, 'timestamp': '2025-09-10 02:19:31.487613', 'step': 1750, 'epoch': 1} {'type': 'loss', 'content': 0.013511120341718197, 'timestamp': '2025-09-10 02:19:31.503936', 'step': 1751, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 352], 'flops': 10441544708224}, 'timestamp': '2025-09-10 02:19:31.537947', 'step': 1751, 'epoch': 1} {'type': 'loss', 'content': 0.018908429890871048, 'timestamp': '2025-09-10 02:19:31.572494', 'step': 1752, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 8068526078144}, 'timestamp': '2025-09-10 02:19:31.603343', 'step': 1752, 'epoch': 1} {'type': 'loss', 'content': 0.00666068447753787, 'timestamp': '2025-09-10 02:19:31.611216', 'step': 1753, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 8068526078144}, 'timestamp': '2025-09-10 02:19:31.642078', 'step': 1753, 'epoch': 1} {'type': 'loss', 'content': 0.012360441498458385, 'timestamp': '2025-09-10 02:19:31.652141', 'step': 1754, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 7593922352128}, 'timestamp': '2025-09-10 02:19:31.684148', 'step': 1754, 'epoch': 1} {'type': 'loss', 'content': 0.003066555829718709, 'timestamp': '2025-09-10 02:19:31.691911', 'step': 1755, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 7119318626112}, 'timestamp': '2025-09-10 02:19:31.722517', 'step': 1755, 'epoch': 1} {'type': 'loss', 'content': 0.02608044445514679, 'timestamp': '2025-09-10 02:19:31.750820', 'step': 1756, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 6644714900096}, 'timestamp': '2025-09-10 02:19:31.781433', 'step': 1756, 'epoch': 1} {'type': 'loss', 'content': 0.0021821721456944942, 'timestamp': '2025-09-10 02:19:31.786006', 'step': 1757, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 176], 'flops': 5220903722048}, 'timestamp': '2025-09-10 02:19:31.817734', 'step': 1757, 'epoch': 1} {'type': 'loss', 'content': 0.019431469962000847, 'timestamp': '2025-09-10 02:19:31.821666', 'step': 1758, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 5695507448064}, 'timestamp': '2025-09-10 02:19:31.851856', 'step': 1758, 'epoch': 1} {'type': 'loss', 'content': 0.008259564638137817, 'timestamp': '2025-09-10 02:19:31.856444', 'step': 1759, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 8543129804160}, 'timestamp': '2025-09-10 02:19:31.886455', 'step': 1759, 'epoch': 1} {'type': 'loss', 'content': 0.00771870044991374, 'timestamp': '2025-09-10 02:19:31.918190', 'step': 1760, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 5695507448064}, 'timestamp': '2025-09-10 02:19:31.949685', 'step': 1760, 'epoch': 1} {'type': 'loss', 'content': 0.007215961813926697, 'timestamp': '2025-09-10 02:19:31.952016', 'step': 1761, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 304], 'flops': 9017733530176}, 'timestamp': '2025-09-10 02:19:31.983238', 'step': 1761, 'epoch': 1} {'type': 'loss', 'content': 0.0035610010381788015, 'timestamp': '2025-09-10 02:19:31.995161', 'step': 1762, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 7119318626112}, 'timestamp': '2025-09-10 02:19:32.026241', 'step': 1762, 'epoch': 1} {'type': 'loss', 'content': 0.011534550227224827, 'timestamp': '2025-09-10 02:19:32.033825', 'step': 1763, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 8068526078144}, 'timestamp': '2025-09-10 02:19:32.065156', 'step': 1763, 'epoch': 1} {'type': 'loss', 'content': 0.009490884840488434, 'timestamp': '2025-09-10 02:19:32.096112', 'step': 1764, 'epoch': 1} {'type': 'flops', 'content': [{'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5695213340928}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3796808916480}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4429610391296}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4746011128704}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4746011128704}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6011614078336}, {'type': 'perplexity', 'in_batch_dim': [8, 736], 'batch_size': 8, 'flops': 14554433988352}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3796808916480}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4113209653888}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3796808916480}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4746011128704}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2847606704256}, {'type': 'perplexity', 'in_batch_dim': [8, 320], 'batch_size': 8, 'flops': 6328014815744}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5378812603520}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3796808916480}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4746011128704}, {'type': 'perplexity', 'in_batch_dim': [8, 352], 'batch_size': 8, 'flops': 6960816290560}, {'type': 'perplexity', 'in_batch_dim': [8, 384], 'batch_size': 8, 'flops': 7593617765376}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5378812603520}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4429610391296}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4113209653888}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5062411866112}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5062411866112}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4429610391296}, {'type': 'perplexity', 'in_batch_dim': [8, 480], 'batch_size': 8, 'flops': 9492022189824}, {'type': 'perplexity', 'in_batch_dim': [8, 448], 'batch_size': 8, 'flops': 8859220715008}, {'type': 'perplexity', 'in_batch_dim': [8, 320], 'batch_size': 8, 'flops': 6328014815744}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3796808916480}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6011614078336}, {'type': 'perplexity', 'in_batch_dim': [8, 320], 'batch_size': 8, 'flops': 6328014815744}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5695213340928}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4746011128704}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4429610391296}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5378812603520}, {'type': 'perplexity', 'in_batch_dim': [8, 544], 'batch_size': 8, 'flops': 10757625139456}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4429610391296}, {'type': 'perplexity', 'in_batch_dim': [8, 320], 'batch_size': 8, 'flops': 6328014815744}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4113209653888}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4113209653888}, {'type': 'perplexity', 'in_batch_dim': [8, 320], 'batch_size': 8, 'flops': 6328014815744}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3480408179072}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5062411866112}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6011614078336}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4429610391296}, {'type': 'perplexity', 'in_batch_dim': [8, 384], 'batch_size': 8, 'flops': 7593617765376}, {'type': 'perplexity', 'in_batch_dim': [8, 416], 'batch_size': 8, 'flops': 8226419240192}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4746011128704}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3480408179072}, {'type': 'perplexity', 'in_batch_dim': [8, 384], 'batch_size': 8, 'flops': 7593617765376}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3480408179072}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3480408179072}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5378812603520}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3796808916480}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4113209653888}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5062411866112}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5378812603520}, {'type': 'perplexity', 'in_batch_dim': [8, 320], 'batch_size': 8, 'flops': 6328014815744}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4429610391296}, {'type': 'perplexity', 'in_batch_dim': [8, 160], 'batch_size': 8, 'flops': 3164007441664}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4113209653888}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5695213340928}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3796808916480}, {'type': 'perplexity', 'in_batch_dim': [8, 160], 'batch_size': 8, 'flops': 3164007441664}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6011614078336}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5062411866112}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4429610391296}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3480408179072}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4746011128704}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3480408179072}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4113209653888}, {'type': 'perplexity', 'in_batch_dim': [8, 624], 'batch_size': 8, 'flops': 12339628826496}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4113209653888}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5695213340928}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5062411866112}, {'type': 'perplexity', 'in_batch_dim': [8, 384], 'batch_size': 8, 'flops': 7593617765376}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4429610391296}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4113209653888}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5062411866112}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4429610391296}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5695213340928}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3796808916480}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5378812603520}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4429610391296}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5378812603520}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3796808916480}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5695213340928}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4429610391296}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5062411866112}, {'type': 'perplexity', 'in_batch_dim': [8, 160], 'batch_size': 8, 'flops': 3164007441664}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4746011128704}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5695213340928}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3796808916480}, {'type': 'perplexity', 'in_batch_dim': [8, 416], 'batch_size': 8, 'flops': 8226419240192}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3796808916480}, {'type': 'perplexity', 'in_batch_dim': [8, 160], 'batch_size': 8, 'flops': 3164007441664}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4746011128704}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3796808916480}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2847606704256}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5378812603520}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4429610391296}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4113209653888}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4429610391296}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6011614078336}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4113209653888}, {'type': 'perplexity', 'in_batch_dim': [8, 320], 'batch_size': 8, 'flops': 6328014815744}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4746011128704}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4746011128704}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3796808916480}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5062411866112}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3480408179072}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3796808916480}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3480408179072}, {'type': 'perplexity', 'in_batch_dim': [8, 352], 'batch_size': 8, 'flops': 6960816290560}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5062411866112}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4429610391296}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6011614078336}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5062411866112}, {'type': 'perplexity', 'in_batch_dim': [8, 576], 'batch_size': 8, 'flops': 11390426614272}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4429610391296}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3796808916480}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6011614078336}, {'type': 'perplexity', 'in_batch_dim': [8, 432], 'batch_size': 8, 'flops': 8542819977600}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5378812603520}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5378812603520}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5062411866112}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4113209653888}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4113209653888}, {'type': 'perplexity', 'in_batch_dim': [8, 368], 'batch_size': 8, 'flops': 7277217027968}, {'type': 'perplexity', 'in_batch_dim': [8, 160], 'batch_size': 8, 'flops': 3164007441664}, {'type': 'perplexity', 'in_batch_dim': [8, 368], 'batch_size': 8, 'flops': 7277217027968}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3796808916480}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4746011128704}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4113209653888}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4429610391296}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5695213340928}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5695213340928}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5062411866112}, {'type': 'perplexity', 'in_batch_dim': [8, 432], 'batch_size': 8, 'flops': 8542819977600}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5062411866112}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4113209653888}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3480408179072}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3480408179072}, {'type': 'perplexity', 'in_batch_dim': [8, 160], 'batch_size': 8, 'flops': 3164007441664}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4113209653888}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3796808916480}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5062411866112}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 368], 'batch_size': 8, 'flops': 7277217027968}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5378812603520}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5378812603520}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5062411866112}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2847606704256}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4429610391296}, {'type': 'perplexity', 'in_batch_dim': [8, 320], 'batch_size': 8, 'flops': 6328014815744}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3796808916480}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4429610391296}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4113209653888}, {'type': 'perplexity', 'in_batch_dim': [8, 384], 'batch_size': 8, 'flops': 7593617765376}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5378812603520}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4746011128704}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3480408179072}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5062411866112}, {'type': 'perplexity', 'in_batch_dim': [8, 320], 'batch_size': 8, 'flops': 6328014815744}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5378812603520}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4113209653888}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4113209653888}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5695213340928}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3480408179072}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6011614078336}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4746011128704}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5378812603520}, {'type': 'perplexity', 'in_batch_dim': [8, 320], 'batch_size': 8, 'flops': 6328014815744}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4429610391296}, {'type': 'perplexity', 'in_batch_dim': [8, 320], 'batch_size': 8, 'flops': 6328014815744}, {'type': 'perplexity', 'in_batch_dim': [8, 416], 'batch_size': 8, 'flops': 8226419240192}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5062411866112}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5062411866112}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2847606704256}, {'type': 'perplexity', 'in_batch_dim': [8, 336], 'batch_size': 8, 'flops': 6644415553152}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6011614078336}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5062411866112}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4113209653888}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5695213340928}, {'type': 'perplexity', 'in_batch_dim': [8, 416], 'batch_size': 8, 'flops': 8226419240192}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3796808916480}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5695213340928}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4746011128704}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4429610391296}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4113209653888}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3480408179072}, {'type': 'perplexity', 'in_batch_dim': [8, 320], 'batch_size': 8, 'flops': 6328014815744}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3480408179072}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5378812603520}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6011614078336}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4429610391296}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5695213340928}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5695213340928}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3480408179072}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4429610391296}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5062411866112}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3480408179072}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4113209653888}, {'type': 'perplexity', 'in_batch_dim': [8, 352], 'batch_size': 8, 'flops': 6960816290560}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4113209653888}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2847606704256}, {'type': 'perplexity', 'in_batch_dim': [8, 160], 'batch_size': 8, 'flops': 3164007441664}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3796808916480}, {'type': 'perplexity', 'in_batch_dim': [8, 496], 'batch_size': 8, 'flops': 9808422927232}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3480408179072}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4113209653888}, {'type': 'perplexity', 'in_batch_dim': [8, 544], 'batch_size': 8, 'flops': 10757625139456}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6011614078336}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5378812603520}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5695213340928}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4746011128704}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5695213340928}, {'type': 'perplexity', 'in_batch_dim': [8, 384], 'batch_size': 8, 'flops': 7593617765376}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3480408179072}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5378812603520}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3480408179072}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5378812603520}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5062411866112}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3796808916480}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5062411866112}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5695213340928}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5378812603520}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4429610391296}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5062411866112}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3480408179072}, {'type': 'perplexity', 'in_batch_dim': [8, 656], 'batch_size': 8, 'flops': 12972430301312}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5062411866112}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3796808916480}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3480408179072}, {'type': 'perplexity', 'in_batch_dim': [8, 160], 'batch_size': 8, 'flops': 3164007441664}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5695213340928}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6011614078336}, {'type': 'perplexity', 'in_batch_dim': [8, 320], 'batch_size': 8, 'flops': 6328014815744}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5062411866112}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5062411866112}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5695213340928}, {'type': 'perplexity', 'in_batch_dim': [8, 320], 'batch_size': 8, 'flops': 6328014815744}, {'type': 'perplexity', 'in_batch_dim': [8, 384], 'batch_size': 8, 'flops': 7593617765376}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5062411866112}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4746011128704}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4113209653888}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4746011128704}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4746011128704}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3480408179072}, {'type': 'perplexity', 'in_batch_dim': [8, 320], 'batch_size': 8, 'flops': 6328014815744}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5695213340928}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5062411866112}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4113209653888}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4746011128704}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2847606704256}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3796808916480}, {'type': 'perplexity', 'in_batch_dim': [8, 336], 'batch_size': 8, 'flops': 6644415553152}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4113209653888}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4746011128704}, {'type': 'perplexity', 'in_batch_dim': [8, 496], 'batch_size': 8, 'flops': 9808422927232}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5378812603520}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5378812603520}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4429610391296}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5062411866112}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6011614078336}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4746011128704}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3796808916480}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4113209653888}, {'type': 'perplexity', 'in_batch_dim': [8, 496], 'batch_size': 8, 'flops': 9808422927232}, {'type': 'perplexity', 'in_batch_dim': [8, 448], 'batch_size': 8, 'flops': 8859220715008}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6011614078336}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3796808916480}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3480408179072}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5062411866112}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4429610391296}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4113209653888}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4746011128704}, {'type': 'perplexity', 'in_batch_dim': [8, 320], 'batch_size': 8, 'flops': 6328014815744}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6011614078336}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3480408179072}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3796808916480}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3796808916480}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4746011128704}, {'type': 'perplexity', 'in_batch_dim': [8, 416], 'batch_size': 8, 'flops': 8226419240192}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5695213340928}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6011614078336}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6011614078336}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4746011128704}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3480408179072}, {'type': 'perplexity', 'in_batch_dim': [8, 160], 'batch_size': 8, 'flops': 3164007441664}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5695213340928}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3480408179072}, {'type': 'perplexity', 'in_batch_dim': [8, 400], 'batch_size': 8, 'flops': 7910018502784}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4429610391296}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5378812603520}, {'type': 'perplexity', 'in_batch_dim': [8, 400], 'batch_size': 8, 'flops': 7910018502784}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5378812603520}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3480408179072}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6011614078336}, {'type': 'perplexity', 'in_batch_dim': [8, 160], 'batch_size': 8, 'flops': 3164007441664}, {'type': 'perplexity', 'in_batch_dim': [8, 352], 'batch_size': 8, 'flops': 6960816290560}, {'type': 'perplexity', 'in_batch_dim': [8, 320], 'batch_size': 8, 'flops': 6328014815744}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5062411866112}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5062411866112}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4113209653888}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4429610391296}, {'type': 'perplexity', 'in_batch_dim': [8, 352], 'batch_size': 8, 'flops': 6960816290560}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4113209653888}, {'type': 'perplexity', 'in_batch_dim': [8, 160], 'batch_size': 8, 'flops': 3164007441664}, {'type': 'perplexity', 'in_batch_dim': [8, 336], 'batch_size': 8, 'flops': 6644415553152}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4429610391296}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3796808916480}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5378812603520}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5378812603520}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4113209653888}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5378812603520}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3796808916480}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5378812603520}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5378812603520}, {'type': 'perplexity', 'in_batch_dim': [8, 160], 'batch_size': 8, 'flops': 3164007441664}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3480408179072}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6011614078336}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5695213340928}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5378812603520}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5378812603520}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4429610391296}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4746011128704}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6011614078336}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5062411866112}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6011614078336}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5378812603520}, {'type': 'perplexity', 'in_batch_dim': [8, 160], 'batch_size': 8, 'flops': 3164007441664}, {'type': 'perplexity', 'in_batch_dim': [8, 160], 'batch_size': 8, 'flops': 3164007441664}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5062411866112}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5062411866112}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3796808916480}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6011614078336}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4113209653888}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4746011128704}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5378812603520}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3480408179072}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3796808916480}, {'type': 'perplexity', 'in_batch_dim': [8, 320], 'batch_size': 8, 'flops': 6328014815744}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5062411866112}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4746011128704}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5062411866112}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4746011128704}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6011614078336}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3796808916480}, {'type': 'perplexity', 'in_batch_dim': [8, 160], 'batch_size': 8, 'flops': 3164007441664}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2847606704256}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4429610391296}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4746011128704}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5062411866112}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5378812603520}, {'type': 'perplexity', 'in_batch_dim': [8, 464], 'batch_size': 8, 'flops': 9175621452416}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4113209653888}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3480408179072}, {'type': 'perplexity', 'in_batch_dim': [8, 384], 'batch_size': 8, 'flops': 7593617765376}, {'type': 'perplexity', 'in_batch_dim': [8, 448], 'batch_size': 8, 'flops': 8859220715008}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5378812603520}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4113209653888}, {'type': 'perplexity', 'in_batch_dim': [8, 320], 'batch_size': 8, 'flops': 6328014815744}, {'type': 'perplexity', 'in_batch_dim': [8, 560], 'batch_size': 8, 'flops': 11074025876864}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5378812603520}, {'type': 'perplexity', 'in_batch_dim': [8, 160], 'batch_size': 8, 'flops': 3164007441664}, {'type': 'perplexity', 'in_batch_dim': [8, 320], 'batch_size': 8, 'flops': 6328014815744}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5062411866112}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6011614078336}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4746011128704}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6011614078336}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3796808916480}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4429610391296}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4113209653888}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4746011128704}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3480408179072}, {'type': 'perplexity', 'in_batch_dim': [8, 384], 'batch_size': 8, 'flops': 7593617765376}, {'type': 'perplexity', 'in_batch_dim': [8, 576], 'batch_size': 8, 'flops': 11390426614272}, {'type': 'perplexity', 'in_batch_dim': [8, 160], 'batch_size': 8, 'flops': 3164007441664}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4746011128704}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4746011128704}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4429610391296}, {'type': 'perplexity', 'in_batch_dim': [8, 336], 'batch_size': 8, 'flops': 6644415553152}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4746011128704}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3796808916480}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3796808916480}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5062411866112}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5378812603520}, {'type': 'perplexity', 'in_batch_dim': [8, 368], 'batch_size': 8, 'flops': 7277217027968}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3480408179072}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4113209653888}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4746011128704}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3480408179072}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4746011128704}, {'type': 'perplexity', 'in_batch_dim': [8, 1168], 'batch_size': 8, 'flops': 23097253898368}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5062411866112}, {'type': 'perplexity', 'in_batch_dim': [8, 160], 'batch_size': 8, 'flops': 3164007441664}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4746011128704}, {'type': 'perplexity', 'in_batch_dim': [8, 640], 'batch_size': 8, 'flops': 12656029563904}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5378812603520}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4429610391296}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4429610391296}, {'type': 'perplexity', 'in_batch_dim': [8, 384], 'batch_size': 8, 'flops': 7593617765376}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3480408179072}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4113209653888}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3796808916480}, {'type': 'perplexity', 'in_batch_dim': [8, 336], 'batch_size': 8, 'flops': 6644415553152}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3480408179072}, {'type': 'perplexity', 'in_batch_dim': [6, 208], 'batch_size': 8, 'flops': 4113209653888}], 'timestamp': '2025-09-10 02:19:42.252845', 'step': 1764, 'epoch': 1} {'type': 'pplx', 'content': 13646644.047763163, 'timestamp': '2025-09-10 02:19:42.255566', 'step': 1764, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 5695507448064}, 'timestamp': '2025-09-10 02:19:42.285893', 'step': 1764, 'epoch': 1} {'type': 'loss', 'content': 0.009771243669092655, 'timestamp': '2025-09-10 02:19:42.288021', 'step': 1765, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 6644714900096}, 'timestamp': '2025-09-10 02:19:42.319947', 'step': 1765, 'epoch': 1} {'type': 'loss', 'content': 0.0313577726483345, 'timestamp': '2025-09-10 02:19:42.326528', 'step': 1766, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 8068526078144}, 'timestamp': '2025-09-10 02:19:42.358434', 'step': 1766, 'epoch': 1} {'type': 'loss', 'content': 0.014947721734642982, 'timestamp': '2025-09-10 02:19:42.368142', 'step': 1767, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 5695507448064}, 'timestamp': '2025-09-10 02:19:42.399916', 'step': 1767, 'epoch': 1} {'type': 'loss', 'content': 0.006268322933465242, 'timestamp': '2025-09-10 02:19:42.424984', 'step': 1768, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 7119318626112}, 'timestamp': '2025-09-10 02:19:42.457111', 'step': 1768, 'epoch': 1} {'type': 'loss', 'content': 0.0015659164637327194, 'timestamp': '2025-09-10 02:19:42.461865', 'step': 1769, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 6170111174080}, 'timestamp': '2025-09-10 02:19:42.493385', 'step': 1769, 'epoch': 1} {'type': 'loss', 'content': 0.023305343464016914, 'timestamp': '2025-09-10 02:19:42.500141', 'step': 1770, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 7119318626112}, 'timestamp': '2025-09-10 02:19:42.533254', 'step': 1770, 'epoch': 1} {'type': 'loss', 'content': 0.027237599715590477, 'timestamp': '2025-09-10 02:19:42.540707', 'step': 1771, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 7119318626112}, 'timestamp': '2025-09-10 02:19:42.572132', 'step': 1771, 'epoch': 1} {'type': 'loss', 'content': 0.0052889627404510975, 'timestamp': '2025-09-10 02:19:42.600096', 'step': 1772, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 304], 'flops': 9017733530176}, 'timestamp': '2025-09-10 02:19:42.633366', 'step': 1772, 'epoch': 1} {'type': 'loss', 'content': 0.008854638785123825, 'timestamp': '2025-09-10 02:19:42.642834', 'step': 1773, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 6170111174080}, 'timestamp': '2025-09-10 02:19:42.673736', 'step': 1773, 'epoch': 1} {'type': 'loss', 'content': 0.0037044784985482693, 'timestamp': '2025-09-10 02:19:42.680657', 'step': 1774, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 7593922352128}, 'timestamp': '2025-09-10 02:19:42.712060', 'step': 1774, 'epoch': 1} {'type': 'loss', 'content': 0.0077804699540138245, 'timestamp': '2025-09-10 02:19:42.719552', 'step': 1775, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 6170111174080}, 'timestamp': '2025-09-10 02:19:42.750994', 'step': 1775, 'epoch': 1} {'type': 'loss', 'content': 0.004913232754915953, 'timestamp': '2025-09-10 02:19:42.778581', 'step': 1776, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 336], 'flops': 9966940982208}, 'timestamp': '2025-09-10 02:19:42.811582', 'step': 1776, 'epoch': 1} {'type': 'loss', 'content': 0.051165949553251266, 'timestamp': '2025-09-10 02:19:42.824296', 'step': 1777, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 8543129804160}, 'timestamp': '2025-09-10 02:19:42.856365', 'step': 1777, 'epoch': 1} {'type': 'loss', 'content': 0.004899430554360151, 'timestamp': '2025-09-10 02:19:42.867271', 'step': 1778, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 4746299996032}, 'timestamp': '2025-09-10 02:19:42.897553', 'step': 1778, 'epoch': 1} {'type': 'loss', 'content': 0.05997491627931595, 'timestamp': '2025-09-10 02:19:42.900109', 'step': 1779, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 8543129804160}, 'timestamp': '2025-09-10 02:19:42.931479', 'step': 1779, 'epoch': 1} {'type': 'loss', 'content': 0.031111031770706177, 'timestamp': '2025-09-10 02:19:42.963258', 'step': 1780, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 384], 'flops': 11390752160256}, 'timestamp': '2025-09-10 02:19:42.996526', 'step': 1780, 'epoch': 1} {'type': 'loss', 'content': 0.01209025364369154, 'timestamp': '2025-09-10 02:19:43.009852', 'step': 1781, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 8543129804160}, 'timestamp': '2025-09-10 02:19:43.042551', 'step': 1781, 'epoch': 1} {'type': 'loss', 'content': 0.004403825383633375, 'timestamp': '2025-09-10 02:19:43.053498', 'step': 1782, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 5695507448064}, 'timestamp': '2025-09-10 02:19:43.085270', 'step': 1782, 'epoch': 1} {'type': 'loss', 'content': 0.022716468200087547, 'timestamp': '2025-09-10 02:19:43.089510', 'step': 1783, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 400], 'flops': 11865355886272}, 'timestamp': '2025-09-10 02:19:43.127488', 'step': 1783, 'epoch': 1} {'type': 'loss', 'content': 0.04864273592829704, 'timestamp': '2025-09-10 02:19:43.164051', 'step': 1784, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 8543129804160}, 'timestamp': '2025-09-10 02:19:43.195469', 'step': 1784, 'epoch': 1} {'type': 'loss', 'content': 0.015554594807326794, 'timestamp': '2025-09-10 02:19:43.203965', 'step': 1785, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 4746299996032}, 'timestamp': '2025-09-10 02:19:43.234101', 'step': 1785, 'epoch': 1} {'type': 'loss', 'content': 0.005889651831239462, 'timestamp': '2025-09-10 02:19:43.236636', 'step': 1786, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 6170111174080}, 'timestamp': '2025-09-10 02:19:43.267481', 'step': 1786, 'epoch': 1} {'type': 'loss', 'content': 0.009219110012054443, 'timestamp': '2025-09-10 02:19:43.274228', 'step': 1787, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:19:43.305143', 'step': 1787, 'epoch': 1} {'type': 'loss', 'content': 0.011448384262621403, 'timestamp': '2025-09-10 02:19:43.328394', 'step': 1788, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 6170111174080}, 'timestamp': '2025-09-10 02:19:43.358848', 'step': 1788, 'epoch': 1} {'type': 'loss', 'content': 0.006459720432758331, 'timestamp': '2025-09-10 02:19:43.363268', 'step': 1789, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 176], 'flops': 5220903722048}, 'timestamp': '2025-09-10 02:19:43.395200', 'step': 1789, 'epoch': 1} {'type': 'loss', 'content': 0.007149911485612392, 'timestamp': '2025-09-10 02:19:43.398808', 'step': 1790, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 400], 'flops': 11865355886272}, 'timestamp': '2025-09-10 02:19:43.437204', 'step': 1790, 'epoch': 1} {'type': 'loss', 'content': 0.008804053999483585, 'timestamp': '2025-09-10 02:19:43.452810', 'step': 1791, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 8068526078144}, 'timestamp': '2025-09-10 02:19:43.484768', 'step': 1791, 'epoch': 1} {'type': 'loss', 'content': 0.008087508380413055, 'timestamp': '2025-09-10 02:19:43.515258', 'step': 1792, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 384], 'flops': 11390752160256}, 'timestamp': '2025-09-10 02:19:43.549534', 'step': 1792, 'epoch': 1} {'type': 'loss', 'content': 0.02685811184346676, 'timestamp': '2025-09-10 02:19:43.562840', 'step': 1793, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 6644714900096}, 'timestamp': '2025-09-10 02:19:43.595065', 'step': 1793, 'epoch': 1} {'type': 'loss', 'content': 0.007293777074664831, 'timestamp': '2025-09-10 02:19:43.602126', 'step': 1794, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 5695507448064}, 'timestamp': '2025-09-10 02:19:43.635093', 'step': 1794, 'epoch': 1} {'type': 'loss', 'content': 0.011456483043730259, 'timestamp': '2025-09-10 02:19:43.639291', 'step': 1795, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 4746299996032}, 'timestamp': '2025-09-10 02:19:43.672132', 'step': 1795, 'epoch': 1} {'type': 'loss', 'content': 0.02691769227385521, 'timestamp': '2025-09-10 02:19:43.696135', 'step': 1796, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 5695507448064}, 'timestamp': '2025-09-10 02:19:43.727428', 'step': 1796, 'epoch': 1} {'type': 'loss', 'content': 0.0008614645921625197, 'timestamp': '2025-09-10 02:19:43.729755', 'step': 1797, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 7119318626112}, 'timestamp': '2025-09-10 02:19:43.761349', 'step': 1797, 'epoch': 1} {'type': 'loss', 'content': 0.01995791494846344, 'timestamp': '2025-09-10 02:19:43.768894', 'step': 1798, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 6170111174080}, 'timestamp': '2025-09-10 02:19:43.799930', 'step': 1798, 'epoch': 1} {'type': 'loss', 'content': 0.013489159755408764, 'timestamp': '2025-09-10 02:19:43.806777', 'step': 1799, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 304], 'flops': 9017733530176}, 'timestamp': '2025-09-10 02:19:43.837880', 'step': 1799, 'epoch': 1} {'type': 'loss', 'content': 0.014494777657091618, 'timestamp': '2025-09-10 02:19:43.870637', 'step': 1800, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 336], 'flops': 9966940982208}, 'timestamp': '2025-09-10 02:19:43.903188', 'step': 1800, 'epoch': 1} {'type': 'loss', 'content': 0.02528318762779236, 'timestamp': '2025-09-10 02:19:43.915834', 'step': 1801, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 336], 'flops': 9966940982208}, 'timestamp': '2025-09-10 02:19:43.949804', 'step': 1801, 'epoch': 1} {'type': 'loss', 'content': 0.0043422463349998, 'timestamp': '2025-09-10 02:19:43.963175', 'step': 1802, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 4746299996032}, 'timestamp': '2025-09-10 02:19:43.994251', 'step': 1802, 'epoch': 1} {'type': 'loss', 'content': 0.0019023737404495478, 'timestamp': '2025-09-10 02:19:43.996710', 'step': 1803, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 5695507448064}, 'timestamp': '2025-09-10 02:19:44.027467', 'step': 1803, 'epoch': 1} {'type': 'loss', 'content': 0.01238183956593275, 'timestamp': '2025-09-10 02:19:44.052681', 'step': 1804, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 5695507448064}, 'timestamp': '2025-09-10 02:19:44.083781', 'step': 1804, 'epoch': 1} {'type': 'loss', 'content': 0.01738804019987583, 'timestamp': '2025-09-10 02:19:44.086049', 'step': 1805, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 7119318626112}, 'timestamp': '2025-09-10 02:19:44.117517', 'step': 1805, 'epoch': 1} {'type': 'loss', 'content': 0.0023342971689999104, 'timestamp': '2025-09-10 02:19:44.125013', 'step': 1806, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 336], 'flops': 9966940982208}, 'timestamp': '2025-09-10 02:19:44.160189', 'step': 1806, 'epoch': 1} {'type': 'loss', 'content': 0.005149137694388628, 'timestamp': '2025-09-10 02:19:44.173597', 'step': 1807, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 5695507448064}, 'timestamp': '2025-09-10 02:19:44.204211', 'step': 1807, 'epoch': 1} {'type': 'loss', 'content': 0.0011267533991485834, 'timestamp': '2025-09-10 02:19:44.229782', 'step': 1808, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 8543129804160}, 'timestamp': '2025-09-10 02:19:44.261247', 'step': 1808, 'epoch': 1} {'type': 'loss', 'content': 0.022445213049650192, 'timestamp': '2025-09-10 02:19:44.269737', 'step': 1809, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 8068526078144}, 'timestamp': '2025-09-10 02:19:44.301470', 'step': 1809, 'epoch': 1} {'type': 'loss', 'content': 0.034575022757053375, 'timestamp': '2025-09-10 02:19:44.311176', 'step': 1810, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 4746299996032}, 'timestamp': '2025-09-10 02:19:44.342260', 'step': 1810, 'epoch': 1} {'type': 'loss', 'content': 0.002625245600938797, 'timestamp': '2025-09-10 02:19:44.344711', 'step': 1811, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 304], 'flops': 9017733530176}, 'timestamp': '2025-09-10 02:19:44.376014', 'step': 1811, 'epoch': 1} {'type': 'loss', 'content': 0.00602992856875062, 'timestamp': '2025-09-10 02:19:44.408890', 'step': 1812, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 6170111174080}, 'timestamp': '2025-09-10 02:19:44.440634', 'step': 1812, 'epoch': 1} {'type': 'loss', 'content': 0.003411894431337714, 'timestamp': '2025-09-10 02:19:44.444840', 'step': 1813, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 8068526078144}, 'timestamp': '2025-09-10 02:19:44.476205', 'step': 1813, 'epoch': 1} {'type': 'loss', 'content': 0.015310808084905148, 'timestamp': '2025-09-10 02:19:44.486556', 'step': 1814, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 304], 'flops': 9017733530176}, 'timestamp': '2025-09-10 02:19:44.517642', 'step': 1814, 'epoch': 1} {'type': 'loss', 'content': 0.005168873351067305, 'timestamp': '2025-09-10 02:19:44.529701', 'step': 1815, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-10 02:19:44.562139', 'step': 1815, 'epoch': 1} {'type': 'loss', 'content': 0.008812850341200829, 'timestamp': '2025-09-10 02:19:44.585695', 'step': 1816, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 352], 'flops': 10441544708224}, 'timestamp': '2025-09-10 02:19:44.618878', 'step': 1816, 'epoch': 1} {'type': 'loss', 'content': 0.014714348129928112, 'timestamp': '2025-09-10 02:19:44.631876', 'step': 1817, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 8068526078144}, 'timestamp': '2025-09-10 02:19:44.664299', 'step': 1817, 'epoch': 1} {'type': 'loss', 'content': 0.012252910062670708, 'timestamp': '2025-09-10 02:19:44.674674', 'step': 1818, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 6644714900096}, 'timestamp': '2025-09-10 02:19:44.705225', 'step': 1818, 'epoch': 1} {'type': 'loss', 'content': 0.03599643334746361, 'timestamp': '2025-09-10 02:19:44.712363', 'step': 1819, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 8543129804160}, 'timestamp': '2025-09-10 02:19:44.743233', 'step': 1819, 'epoch': 1} {'type': 'loss', 'content': 0.011372431181371212, 'timestamp': '2025-09-10 02:19:44.775022', 'step': 1820, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 6644714900096}, 'timestamp': '2025-09-10 02:19:44.806401', 'step': 1820, 'epoch': 1} {'type': 'loss', 'content': 0.002817036584019661, 'timestamp': '2025-09-10 02:19:44.810885', 'step': 1821, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 384], 'flops': 11390752160256}, 'timestamp': '2025-09-10 02:19:44.845983', 'step': 1821, 'epoch': 1} {'type': 'loss', 'content': 0.007789141498506069, 'timestamp': '2025-09-10 02:19:44.860026', 'step': 1822, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 6170111174080}, 'timestamp': '2025-09-10 02:19:44.892057', 'step': 1822, 'epoch': 1} {'type': 'loss', 'content': 0.02494768425822258, 'timestamp': '2025-09-10 02:19:44.898761', 'step': 1823, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 320], 'flops': 9492337256192}, 'timestamp': '2025-09-10 02:19:44.931244', 'step': 1823, 'epoch': 1} {'type': 'loss', 'content': 0.028473839163780212, 'timestamp': '2025-09-10 02:19:44.964020', 'step': 1824, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 6644714900096}, 'timestamp': '2025-09-10 02:19:44.996431', 'step': 1824, 'epoch': 1} {'type': 'loss', 'content': 0.028283346444368362, 'timestamp': '2025-09-10 02:19:45.000490', 'step': 1825, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 7593922352128}, 'timestamp': '2025-09-10 02:19:45.031640', 'step': 1825, 'epoch': 1} {'type': 'loss', 'content': 0.03897113725543022, 'timestamp': '2025-09-10 02:19:45.039246', 'step': 1826, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 6170111174080}, 'timestamp': '2025-09-10 02:19:45.071557', 'step': 1826, 'epoch': 1} {'type': 'loss', 'content': 0.018188832327723503, 'timestamp': '2025-09-10 02:19:45.078189', 'step': 1827, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-10 02:19:45.109791', 'step': 1827, 'epoch': 1} {'type': 'loss', 'content': 0.03504600375890732, 'timestamp': '2025-09-10 02:19:45.133580', 'step': 1828, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 6644714900096}, 'timestamp': '2025-09-10 02:19:45.165973', 'step': 1828, 'epoch': 1} {'type': 'loss', 'content': 0.01439552940428257, 'timestamp': '2025-09-10 02:19:45.170654', 'step': 1829, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 8068526078144}, 'timestamp': '2025-09-10 02:19:45.202156', 'step': 1829, 'epoch': 1} {'type': 'loss', 'content': 0.0034188800491392612, 'timestamp': '2025-09-10 02:19:45.212188', 'step': 1830, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 6170111174080}, 'timestamp': '2025-09-10 02:19:45.242754', 'step': 1830, 'epoch': 1} {'type': 'loss', 'content': 0.0018745275447145104, 'timestamp': '2025-09-10 02:19:45.249485', 'step': 1831, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 5695507448064}, 'timestamp': '2025-09-10 02:19:45.281073', 'step': 1831, 'epoch': 1} {'type': 'loss', 'content': 0.01610477827489376, 'timestamp': '2025-09-10 02:19:45.306446', 'step': 1832, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 176], 'flops': 5220903722048}, 'timestamp': '2025-09-10 02:19:45.337756', 'step': 1832, 'epoch': 1} {'type': 'loss', 'content': 0.00925888679921627, 'timestamp': '2025-09-10 02:19:45.340062', 'step': 1833, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 7593922352128}, 'timestamp': '2025-09-10 02:19:45.372125', 'step': 1833, 'epoch': 1} {'type': 'loss', 'content': 0.019423970952630043, 'timestamp': '2025-09-10 02:19:45.379888', 'step': 1834, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 7593922352128}, 'timestamp': '2025-09-10 02:19:45.412255', 'step': 1834, 'epoch': 1} {'type': 'loss', 'content': 0.004022897686809301, 'timestamp': '2025-09-10 02:19:45.419864', 'step': 1835, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 8068526078144}, 'timestamp': '2025-09-10 02:19:45.451179', 'step': 1835, 'epoch': 1} {'type': 'loss', 'content': 0.05194368213415146, 'timestamp': '2025-09-10 02:19:45.482178', 'step': 1836, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 7119318626112}, 'timestamp': '2025-09-10 02:19:45.514061', 'step': 1836, 'epoch': 1} {'type': 'loss', 'content': 0.02164938487112522, 'timestamp': '2025-09-10 02:19:45.518937', 'step': 1837, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 304], 'flops': 9017733530176}, 'timestamp': '2025-09-10 02:19:45.549764', 'step': 1837, 'epoch': 1} {'type': 'loss', 'content': 0.006102901417762041, 'timestamp': '2025-09-10 02:19:45.561812', 'step': 1838, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 6170111174080}, 'timestamp': '2025-09-10 02:19:45.592699', 'step': 1838, 'epoch': 1} {'type': 'loss', 'content': 0.029408836737275124, 'timestamp': '2025-09-10 02:19:45.599380', 'step': 1839, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 304], 'flops': 9017733530176}, 'timestamp': '2025-09-10 02:19:45.631255', 'step': 1839, 'epoch': 1} {'type': 'loss', 'content': 0.002608294365927577, 'timestamp': '2025-09-10 02:19:45.663366', 'step': 1840, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 176], 'flops': 5220903722048}, 'timestamp': '2025-09-10 02:19:45.695272', 'step': 1840, 'epoch': 1} {'type': 'loss', 'content': 0.007881422527134418, 'timestamp': '2025-09-10 02:19:45.697541', 'step': 1841, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 7119318626112}, 'timestamp': '2025-09-10 02:19:45.728912', 'step': 1841, 'epoch': 1} {'type': 'loss', 'content': 0.0055758110247552395, 'timestamp': '2025-09-10 02:19:45.736321', 'step': 1842, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 7593922352128}, 'timestamp': '2025-09-10 02:19:45.767790', 'step': 1842, 'epoch': 1} {'type': 'loss', 'content': 0.010343975387513638, 'timestamp': '2025-09-10 02:19:45.775320', 'step': 1843, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 7593922352128}, 'timestamp': '2025-09-10 02:19:45.806451', 'step': 1843, 'epoch': 1} {'type': 'loss', 'content': 0.011013594456017017, 'timestamp': '2025-09-10 02:19:45.835059', 'step': 1844, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 6644714900096}, 'timestamp': '2025-09-10 02:19:45.866731', 'step': 1844, 'epoch': 1} {'type': 'loss', 'content': 0.014858272857964039, 'timestamp': '2025-09-10 02:19:45.871140', 'step': 1845, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 7593922352128}, 'timestamp': '2025-09-10 02:19:45.903377', 'step': 1845, 'epoch': 1} {'type': 'loss', 'content': 0.016053643077611923, 'timestamp': '2025-09-10 02:19:45.910764', 'step': 1846, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 8543129804160}, 'timestamp': '2025-09-10 02:19:45.943260', 'step': 1846, 'epoch': 1} {'type': 'loss', 'content': 0.0070062256418168545, 'timestamp': '2025-09-10 02:19:45.953665', 'step': 1847, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 352], 'flops': 10441544708224}, 'timestamp': '2025-09-10 02:19:45.989305', 'step': 1847, 'epoch': 1} {'type': 'loss', 'content': 0.005651051644235849, 'timestamp': '2025-09-10 02:19:46.023820', 'step': 1848, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 6170111174080}, 'timestamp': '2025-09-10 02:19:46.055949', 'step': 1848, 'epoch': 1} {'type': 'loss', 'content': 0.022787367925047874, 'timestamp': '2025-09-10 02:19:46.060181', 'step': 1849, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 7119318626112}, 'timestamp': '2025-09-10 02:19:46.090933', 'step': 1849, 'epoch': 1} {'type': 'loss', 'content': 0.016836825758218765, 'timestamp': '2025-09-10 02:19:46.098375', 'step': 1850, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 7119318626112}, 'timestamp': '2025-09-10 02:19:46.128908', 'step': 1850, 'epoch': 1} {'type': 'loss', 'content': 0.022327521815896034, 'timestamp': '2025-09-10 02:19:46.136274', 'step': 1851, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 416], 'flops': 12339959612288}, 'timestamp': '2025-09-10 02:19:46.174803', 'step': 1851, 'epoch': 1} {'type': 'loss', 'content': 0.016063082963228226, 'timestamp': '2025-09-10 02:19:46.211563', 'step': 1852, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 848], 'flops': 25154260214720}, 'timestamp': '2025-09-10 02:19:46.280708', 'step': 1852, 'epoch': 1} {'type': 'loss', 'content': 0.0024673263542354107, 'timestamp': '2025-09-10 02:19:46.310219', 'step': 1853, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 320], 'flops': 9492337256192}, 'timestamp': '2025-09-10 02:19:46.346698', 'step': 1853, 'epoch': 1} {'type': 'loss', 'content': 0.004926327615976334, 'timestamp': '2025-09-10 02:19:46.359274', 'step': 1854, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 176], 'flops': 5220903722048}, 'timestamp': '2025-09-10 02:19:46.391250', 'step': 1854, 'epoch': 1} {'type': 'loss', 'content': 0.011958016082644463, 'timestamp': '2025-09-10 02:19:46.395459', 'step': 1855, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 5695507448064}, 'timestamp': '2025-09-10 02:19:46.427033', 'step': 1855, 'epoch': 1} {'type': 'loss', 'content': 0.007356296759098768, 'timestamp': '2025-09-10 02:19:46.452303', 'step': 1856, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 8068526078144}, 'timestamp': '2025-09-10 02:19:46.483238', 'step': 1856, 'epoch': 1} {'type': 'loss', 'content': 0.010786294937133789, 'timestamp': '2025-09-10 02:19:46.491004', 'step': 1857, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 6170111174080}, 'timestamp': '2025-09-10 02:19:46.522074', 'step': 1857, 'epoch': 1} {'type': 'loss', 'content': 0.013343775644898415, 'timestamp': '2025-09-10 02:19:46.528769', 'step': 1858, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 352], 'flops': 10441544708224}, 'timestamp': '2025-09-10 02:19:46.563654', 'step': 1858, 'epoch': 1} {'type': 'loss', 'content': 0.022976329550147057, 'timestamp': '2025-09-10 02:19:46.577360', 'step': 1859, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 384], 'flops': 11390752160256}, 'timestamp': '2025-09-10 02:19:46.612402', 'step': 1859, 'epoch': 1} {'type': 'loss', 'content': 0.023406516760587692, 'timestamp': '2025-09-10 02:19:46.647250', 'step': 1860, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 176], 'flops': 5220903722048}, 'timestamp': '2025-09-10 02:19:46.677341', 'step': 1860, 'epoch': 1} {'type': 'loss', 'content': 0.009630659595131874, 'timestamp': '2025-09-10 02:19:46.679555', 'step': 1861, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 512], 'flops': 15187581968384}, 'timestamp': '2025-09-10 02:19:46.721859', 'step': 1861, 'epoch': 1} {'type': 'loss', 'content': 0.010469197295606136, 'timestamp': '2025-09-10 02:19:46.739610', 'step': 1862, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 432], 'flops': 12814563338304}, 'timestamp': '2025-09-10 02:19:46.779453', 'step': 1862, 'epoch': 1} {'type': 'loss', 'content': 0.004274784587323666, 'timestamp': '2025-09-10 02:19:46.795659', 'step': 1863, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 5695507448064}, 'timestamp': '2025-09-10 02:19:46.826679', 'step': 1863, 'epoch': 1} {'type': 'loss', 'content': 0.020305419340729713, 'timestamp': '2025-09-10 02:19:46.851960', 'step': 1864, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 320], 'flops': 9492337256192}, 'timestamp': '2025-09-10 02:19:46.882972', 'step': 1864, 'epoch': 1} {'type': 'loss', 'content': 0.01911826804280281, 'timestamp': '2025-09-10 02:19:46.893468', 'step': 1865, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 7593922352128}, 'timestamp': '2025-09-10 02:19:46.924922', 'step': 1865, 'epoch': 1} {'type': 'loss', 'content': 0.006116253789514303, 'timestamp': '2025-09-10 02:19:46.932746', 'step': 1866, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 8543129804160}, 'timestamp': '2025-09-10 02:19:46.963830', 'step': 1866, 'epoch': 1} {'type': 'loss', 'content': 0.03426285460591316, 'timestamp': '2025-09-10 02:19:46.974691', 'step': 1867, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 8543129804160}, 'timestamp': '2025-09-10 02:19:47.005402', 'step': 1867, 'epoch': 1} {'type': 'loss', 'content': 0.01511828787624836, 'timestamp': '2025-09-10 02:19:47.037027', 'step': 1868, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 6644714900096}, 'timestamp': '2025-09-10 02:19:47.067742', 'step': 1868, 'epoch': 1} {'type': 'loss', 'content': 0.03478477522730827, 'timestamp': '2025-09-10 02:19:47.072398', 'step': 1869, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 336], 'flops': 9966940982208}, 'timestamp': '2025-09-10 02:19:47.105643', 'step': 1869, 'epoch': 1} {'type': 'loss', 'content': 0.0066609373316168785, 'timestamp': '2025-09-10 02:19:47.119012', 'step': 1870, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 6170111174080}, 'timestamp': '2025-09-10 02:19:47.149847', 'step': 1870, 'epoch': 1} {'type': 'loss', 'content': 0.009698964655399323, 'timestamp': '2025-09-10 02:19:47.156680', 'step': 1871, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 8068526078144}, 'timestamp': '2025-09-10 02:19:47.187143', 'step': 1871, 'epoch': 1} {'type': 'loss', 'content': 0.027642009779810905, 'timestamp': '2025-09-10 02:19:47.218357', 'step': 1872, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 6644714900096}, 'timestamp': '2025-09-10 02:19:47.248765', 'step': 1872, 'epoch': 1} {'type': 'loss', 'content': 0.006270275916904211, 'timestamp': '2025-09-10 02:19:47.253531', 'step': 1873, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:19:47.283393', 'step': 1873, 'epoch': 1} {'type': 'loss', 'content': 0.0051859780214726925, 'timestamp': '2025-09-10 02:19:47.285402', 'step': 1874, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 400], 'flops': 11865355886272}, 'timestamp': '2025-09-10 02:19:47.323399', 'step': 1874, 'epoch': 1} {'type': 'loss', 'content': 0.013219809159636497, 'timestamp': '2025-09-10 02:19:47.338950', 'step': 1875, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 5695507448064}, 'timestamp': '2025-09-10 02:19:47.369922', 'step': 1875, 'epoch': 1} {'type': 'loss', 'content': 0.00313380965963006, 'timestamp': '2025-09-10 02:19:47.395333', 'step': 1876, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 320], 'flops': 9492337256192}, 'timestamp': '2025-09-10 02:19:47.426344', 'step': 1876, 'epoch': 1} {'type': 'loss', 'content': 0.017517106607556343, 'timestamp': '2025-09-10 02:19:47.436727', 'step': 1877, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 8068526078144}, 'timestamp': '2025-09-10 02:19:47.468272', 'step': 1877, 'epoch': 1} {'type': 'loss', 'content': 0.04642176628112793, 'timestamp': '2025-09-10 02:19:47.478627', 'step': 1878, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 6644714900096}, 'timestamp': '2025-09-10 02:19:47.509259', 'step': 1878, 'epoch': 1} {'type': 'loss', 'content': 0.015917208045721054, 'timestamp': '2025-09-10 02:19:47.516252', 'step': 1879, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 400], 'flops': 11865355886272}, 'timestamp': '2025-09-10 02:19:47.553668', 'step': 1879, 'epoch': 1} {'type': 'loss', 'content': 0.01571694202721119, 'timestamp': '2025-09-10 02:19:47.590183', 'step': 1880, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 7593922352128}, 'timestamp': '2025-09-10 02:19:47.621751', 'step': 1880, 'epoch': 1} {'type': 'loss', 'content': 0.007821121253073215, 'timestamp': '2025-09-10 02:19:47.626878', 'step': 1881, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 7119318626112}, 'timestamp': '2025-09-10 02:19:47.658608', 'step': 1881, 'epoch': 1} {'type': 'loss', 'content': 0.024149566888809204, 'timestamp': '2025-09-10 02:19:47.666057', 'step': 1882, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 352], 'flops': 10441544708224}, 'timestamp': '2025-09-10 02:19:47.702178', 'step': 1882, 'epoch': 1} {'type': 'loss', 'content': 0.00780960638076067, 'timestamp': '2025-09-10 02:19:47.715829', 'step': 1883, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 7119318626112}, 'timestamp': '2025-09-10 02:19:47.747249', 'step': 1883, 'epoch': 1} {'type': 'loss', 'content': 0.009634777903556824, 'timestamp': '2025-09-10 02:19:47.775606', 'step': 1884, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 6170111174080}, 'timestamp': '2025-09-10 02:19:47.806891', 'step': 1884, 'epoch': 1} {'type': 'loss', 'content': 0.004729505628347397, 'timestamp': '2025-09-10 02:19:47.811470', 'step': 1885, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 320], 'flops': 9492337256192}, 'timestamp': '2025-09-10 02:19:47.843086', 'step': 1885, 'epoch': 1} {'type': 'loss', 'content': 0.028273126110434532, 'timestamp': '2025-09-10 02:19:47.855626', 'step': 1886, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 6170111174080}, 'timestamp': '2025-09-10 02:19:47.887268', 'step': 1886, 'epoch': 1} {'type': 'loss', 'content': 0.0056550041772425175, 'timestamp': '2025-09-10 02:19:47.894317', 'step': 1887, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 7119318626112}, 'timestamp': '2025-09-10 02:19:47.925933', 'step': 1887, 'epoch': 1} {'type': 'loss', 'content': 0.0050589581951498985, 'timestamp': '2025-09-10 02:19:47.954221', 'step': 1888, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 7593922352128}, 'timestamp': '2025-09-10 02:19:47.984999', 'step': 1888, 'epoch': 1} {'type': 'loss', 'content': 0.01905803009867668, 'timestamp': '2025-09-10 02:19:47.990440', 'step': 1889, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 6644714900096}, 'timestamp': '2025-09-10 02:19:48.023540', 'step': 1889, 'epoch': 1} {'type': 'loss', 'content': 0.011093123815953732, 'timestamp': '2025-09-10 02:19:48.030626', 'step': 1890, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 7593922352128}, 'timestamp': '2025-09-10 02:19:48.062235', 'step': 1890, 'epoch': 1} {'type': 'loss', 'content': 0.006900664884597063, 'timestamp': '2025-09-10 02:19:48.070068', 'step': 1891, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 6170111174080}, 'timestamp': '2025-09-10 02:19:48.101187', 'step': 1891, 'epoch': 1} {'type': 'loss', 'content': 0.006212836597114801, 'timestamp': '2025-09-10 02:19:48.128957', 'step': 1892, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 7593922352128}, 'timestamp': '2025-09-10 02:19:48.160628', 'step': 1892, 'epoch': 1} {'type': 'loss', 'content': 0.004362097941339016, 'timestamp': '2025-09-10 02:19:48.165615', 'step': 1893, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 656], 'flops': 19459015502528}, 'timestamp': '2025-09-10 02:19:48.221178', 'step': 1893, 'epoch': 1} {'type': 'loss', 'content': 0.012452795170247555, 'timestamp': '2025-09-10 02:19:48.244554', 'step': 1894, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 8543129804160}, 'timestamp': '2025-09-10 02:19:48.276367', 'step': 1894, 'epoch': 1} {'type': 'loss', 'content': 0.01033748872578144, 'timestamp': '2025-09-10 02:19:48.287260', 'step': 1895, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 6644714900096}, 'timestamp': '2025-09-10 02:19:48.318079', 'step': 1895, 'epoch': 1} {'type': 'loss', 'content': 0.016262022778391838, 'timestamp': '2025-09-10 02:19:48.345998', 'step': 1896, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 416], 'flops': 12339959612288}, 'timestamp': '2025-09-10 02:19:48.382681', 'step': 1896, 'epoch': 1} {'type': 'loss', 'content': 0.009804087691009045, 'timestamp': '2025-09-10 02:19:48.398120', 'step': 1897, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 6170111174080}, 'timestamp': '2025-09-10 02:19:48.429228', 'step': 1897, 'epoch': 1} {'type': 'loss', 'content': 0.0022602048702538013, 'timestamp': '2025-09-10 02:19:48.436055', 'step': 1898, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 7593922352128}, 'timestamp': '2025-09-10 02:19:48.472753', 'step': 1898, 'epoch': 1} {'type': 'loss', 'content': 0.0055809845216572285, 'timestamp': '2025-09-10 02:19:48.480563', 'step': 1899, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 304], 'flops': 9017733530176}, 'timestamp': '2025-09-10 02:19:48.518880', 'step': 1899, 'epoch': 1} {'type': 'loss', 'content': 0.007991933263838291, 'timestamp': '2025-09-10 02:19:48.551926', 'step': 1900, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 368], 'flops': 10916148434240}, 'timestamp': '2025-09-10 02:19:48.588105', 'step': 1900, 'epoch': 1} {'type': 'loss', 'content': 0.006761971395462751, 'timestamp': '2025-09-10 02:19:48.601170', 'step': 1901, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 304], 'flops': 9017733530176}, 'timestamp': '2025-09-10 02:19:48.633355', 'step': 1901, 'epoch': 1} {'type': 'loss', 'content': 0.002031755167990923, 'timestamp': '2025-09-10 02:19:48.645582', 'step': 1902, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 8068526078144}, 'timestamp': '2025-09-10 02:19:48.678384', 'step': 1902, 'epoch': 1} {'type': 'loss', 'content': 0.003453353885561228, 'timestamp': '2025-09-10 02:19:48.688607', 'step': 1903, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 352], 'flops': 10441544708224}, 'timestamp': '2025-09-10 02:19:48.743530', 'step': 1903, 'epoch': 1} {'type': 'loss', 'content': 0.004564212169498205, 'timestamp': '2025-09-10 02:19:48.778028', 'step': 1904, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 7593922352128}, 'timestamp': '2025-09-10 02:19:48.810511', 'step': 1904, 'epoch': 1} {'type': 'loss', 'content': 0.018804430961608887, 'timestamp': '2025-09-10 02:19:48.815823', 'step': 1905, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 5695507448064}, 'timestamp': '2025-09-10 02:19:48.847065', 'step': 1905, 'epoch': 1} {'type': 'loss', 'content': 0.012097448110580444, 'timestamp': '2025-09-10 02:19:48.851347', 'step': 1906, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 320], 'flops': 9492337256192}, 'timestamp': '2025-09-10 02:19:48.885945', 'step': 1906, 'epoch': 1} {'type': 'loss', 'content': 0.003606958081945777, 'timestamp': '2025-09-10 02:19:48.898477', 'step': 1907, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 8068526078144}, 'timestamp': '2025-09-10 02:19:48.933153', 'step': 1907, 'epoch': 1} {'type': 'loss', 'content': 0.0061880433931946754, 'timestamp': '2025-09-10 02:19:48.964240', 'step': 1908, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-10 02:19:48.996907', 'step': 1908, 'epoch': 1} {'type': 'loss', 'content': 0.004639564547687769, 'timestamp': '2025-09-10 02:19:48.999361', 'step': 1909, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 6170111174080}, 'timestamp': '2025-09-10 02:19:49.030532', 'step': 1909, 'epoch': 1} {'type': 'loss', 'content': 0.0022573911119252443, 'timestamp': '2025-09-10 02:19:49.037363', 'step': 1910, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 8068526078144}, 'timestamp': '2025-09-10 02:19:49.068175', 'step': 1910, 'epoch': 1} {'type': 'loss', 'content': 0.006169704254716635, 'timestamp': '2025-09-10 02:19:49.078299', 'step': 1911, 'epoch': 1} {'type': 'flops', 'content': [{'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5695213340928}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3796808916480}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4429610391296}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4746011128704}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4746011128704}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6011614078336}, {'type': 'perplexity', 'in_batch_dim': [8, 736], 'batch_size': 8, 'flops': 14554433988352}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3796808916480}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4113209653888}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3796808916480}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4746011128704}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2847606704256}, {'type': 'perplexity', 'in_batch_dim': [8, 320], 'batch_size': 8, 'flops': 6328014815744}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5378812603520}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3796808916480}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4746011128704}, {'type': 'perplexity', 'in_batch_dim': [8, 352], 'batch_size': 8, 'flops': 6960816290560}, {'type': 'perplexity', 'in_batch_dim': [8, 384], 'batch_size': 8, 'flops': 7593617765376}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5378812603520}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4429610391296}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4113209653888}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5062411866112}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5062411866112}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4429610391296}, {'type': 'perplexity', 'in_batch_dim': [8, 480], 'batch_size': 8, 'flops': 9492022189824}, {'type': 'perplexity', 'in_batch_dim': [8, 448], 'batch_size': 8, 'flops': 8859220715008}, {'type': 'perplexity', 'in_batch_dim': [8, 320], 'batch_size': 8, 'flops': 6328014815744}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3796808916480}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6011614078336}, {'type': 'perplexity', 'in_batch_dim': [8, 320], 'batch_size': 8, 'flops': 6328014815744}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5695213340928}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4746011128704}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4429610391296}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5378812603520}, {'type': 'perplexity', 'in_batch_dim': [8, 544], 'batch_size': 8, 'flops': 10757625139456}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4429610391296}, {'type': 'perplexity', 'in_batch_dim': [8, 320], 'batch_size': 8, 'flops': 6328014815744}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4113209653888}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4113209653888}, {'type': 'perplexity', 'in_batch_dim': [8, 320], 'batch_size': 8, 'flops': 6328014815744}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3480408179072}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5062411866112}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6011614078336}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4429610391296}, {'type': 'perplexity', 'in_batch_dim': [8, 384], 'batch_size': 8, 'flops': 7593617765376}, {'type': 'perplexity', 'in_batch_dim': [8, 416], 'batch_size': 8, 'flops': 8226419240192}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4746011128704}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3480408179072}, {'type': 'perplexity', 'in_batch_dim': [8, 384], 'batch_size': 8, 'flops': 7593617765376}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3480408179072}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3480408179072}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5378812603520}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3796808916480}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4113209653888}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5062411866112}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5378812603520}, {'type': 'perplexity', 'in_batch_dim': [8, 320], 'batch_size': 8, 'flops': 6328014815744}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4429610391296}, {'type': 'perplexity', 'in_batch_dim': [8, 160], 'batch_size': 8, 'flops': 3164007441664}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4113209653888}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5695213340928}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3796808916480}, {'type': 'perplexity', 'in_batch_dim': [8, 160], 'batch_size': 8, 'flops': 3164007441664}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6011614078336}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5062411866112}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4429610391296}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3480408179072}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4746011128704}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3480408179072}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4113209653888}, {'type': 'perplexity', 'in_batch_dim': [8, 624], 'batch_size': 8, 'flops': 12339628826496}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4113209653888}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5695213340928}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5062411866112}, {'type': 'perplexity', 'in_batch_dim': [8, 384], 'batch_size': 8, 'flops': 7593617765376}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4429610391296}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4113209653888}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5062411866112}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4429610391296}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5695213340928}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3796808916480}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5378812603520}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4429610391296}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5378812603520}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3796808916480}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5695213340928}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4429610391296}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5062411866112}, {'type': 'perplexity', 'in_batch_dim': [8, 160], 'batch_size': 8, 'flops': 3164007441664}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4746011128704}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5695213340928}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3796808916480}, {'type': 'perplexity', 'in_batch_dim': [8, 416], 'batch_size': 8, 'flops': 8226419240192}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3796808916480}, {'type': 'perplexity', 'in_batch_dim': [8, 160], 'batch_size': 8, 'flops': 3164007441664}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4746011128704}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3796808916480}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2847606704256}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5378812603520}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4429610391296}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4113209653888}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4429610391296}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6011614078336}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4113209653888}, {'type': 'perplexity', 'in_batch_dim': [8, 320], 'batch_size': 8, 'flops': 6328014815744}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4746011128704}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4746011128704}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3796808916480}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5062411866112}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3480408179072}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3796808916480}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3480408179072}, {'type': 'perplexity', 'in_batch_dim': [8, 352], 'batch_size': 8, 'flops': 6960816290560}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5062411866112}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4429610391296}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6011614078336}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5062411866112}, {'type': 'perplexity', 'in_batch_dim': [8, 576], 'batch_size': 8, 'flops': 11390426614272}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4429610391296}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3796808916480}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6011614078336}, {'type': 'perplexity', 'in_batch_dim': [8, 432], 'batch_size': 8, 'flops': 8542819977600}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5378812603520}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5378812603520}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5062411866112}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4113209653888}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4113209653888}, {'type': 'perplexity', 'in_batch_dim': [8, 368], 'batch_size': 8, 'flops': 7277217027968}, {'type': 'perplexity', 'in_batch_dim': [8, 160], 'batch_size': 8, 'flops': 3164007441664}, {'type': 'perplexity', 'in_batch_dim': [8, 368], 'batch_size': 8, 'flops': 7277217027968}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3796808916480}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4746011128704}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4113209653888}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4429610391296}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5695213340928}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5695213340928}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5062411866112}, {'type': 'perplexity', 'in_batch_dim': [8, 432], 'batch_size': 8, 'flops': 8542819977600}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5062411866112}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4113209653888}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3480408179072}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3480408179072}, {'type': 'perplexity', 'in_batch_dim': [8, 160], 'batch_size': 8, 'flops': 3164007441664}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4113209653888}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3796808916480}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5062411866112}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 368], 'batch_size': 8, 'flops': 7277217027968}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5378812603520}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5378812603520}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5062411866112}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2847606704256}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4429610391296}, {'type': 'perplexity', 'in_batch_dim': [8, 320], 'batch_size': 8, 'flops': 6328014815744}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3796808916480}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4429610391296}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4113209653888}, {'type': 'perplexity', 'in_batch_dim': [8, 384], 'batch_size': 8, 'flops': 7593617765376}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5378812603520}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4746011128704}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3480408179072}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5062411866112}, {'type': 'perplexity', 'in_batch_dim': [8, 320], 'batch_size': 8, 'flops': 6328014815744}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5378812603520}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4113209653888}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4113209653888}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5695213340928}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3480408179072}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6011614078336}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4746011128704}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5378812603520}, {'type': 'perplexity', 'in_batch_dim': [8, 320], 'batch_size': 8, 'flops': 6328014815744}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4429610391296}, {'type': 'perplexity', 'in_batch_dim': [8, 320], 'batch_size': 8, 'flops': 6328014815744}, {'type': 'perplexity', 'in_batch_dim': [8, 416], 'batch_size': 8, 'flops': 8226419240192}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5062411866112}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5062411866112}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2847606704256}, {'type': 'perplexity', 'in_batch_dim': [8, 336], 'batch_size': 8, 'flops': 6644415553152}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6011614078336}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5062411866112}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4113209653888}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5695213340928}, {'type': 'perplexity', 'in_batch_dim': [8, 416], 'batch_size': 8, 'flops': 8226419240192}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3796808916480}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5695213340928}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4746011128704}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4429610391296}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4113209653888}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3480408179072}, {'type': 'perplexity', 'in_batch_dim': [8, 320], 'batch_size': 8, 'flops': 6328014815744}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3480408179072}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5378812603520}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6011614078336}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4429610391296}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5695213340928}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5695213340928}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3480408179072}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4429610391296}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5062411866112}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3480408179072}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4113209653888}, {'type': 'perplexity', 'in_batch_dim': [8, 352], 'batch_size': 8, 'flops': 6960816290560}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4113209653888}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2847606704256}, {'type': 'perplexity', 'in_batch_dim': [8, 160], 'batch_size': 8, 'flops': 3164007441664}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3796808916480}, {'type': 'perplexity', 'in_batch_dim': [8, 496], 'batch_size': 8, 'flops': 9808422927232}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3480408179072}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4113209653888}, {'type': 'perplexity', 'in_batch_dim': [8, 544], 'batch_size': 8, 'flops': 10757625139456}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6011614078336}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5378812603520}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5695213340928}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4746011128704}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5695213340928}, {'type': 'perplexity', 'in_batch_dim': [8, 384], 'batch_size': 8, 'flops': 7593617765376}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3480408179072}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5378812603520}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3480408179072}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5378812603520}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5062411866112}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3796808916480}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5062411866112}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5695213340928}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5378812603520}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4429610391296}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5062411866112}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3480408179072}, {'type': 'perplexity', 'in_batch_dim': [8, 656], 'batch_size': 8, 'flops': 12972430301312}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5062411866112}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3796808916480}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3480408179072}, {'type': 'perplexity', 'in_batch_dim': [8, 160], 'batch_size': 8, 'flops': 3164007441664}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5695213340928}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6011614078336}, {'type': 'perplexity', 'in_batch_dim': [8, 320], 'batch_size': 8, 'flops': 6328014815744}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5062411866112}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5062411866112}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5695213340928}, {'type': 'perplexity', 'in_batch_dim': [8, 320], 'batch_size': 8, 'flops': 6328014815744}, {'type': 'perplexity', 'in_batch_dim': [8, 384], 'batch_size': 8, 'flops': 7593617765376}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5062411866112}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4746011128704}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4113209653888}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4746011128704}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4746011128704}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3480408179072}, {'type': 'perplexity', 'in_batch_dim': [8, 320], 'batch_size': 8, 'flops': 6328014815744}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5695213340928}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5062411866112}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4113209653888}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4746011128704}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2847606704256}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3796808916480}, {'type': 'perplexity', 'in_batch_dim': [8, 336], 'batch_size': 8, 'flops': 6644415553152}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4113209653888}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4746011128704}, {'type': 'perplexity', 'in_batch_dim': [8, 496], 'batch_size': 8, 'flops': 9808422927232}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5378812603520}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5378812603520}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4429610391296}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5062411866112}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6011614078336}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4746011128704}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3796808916480}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4113209653888}, {'type': 'perplexity', 'in_batch_dim': [8, 496], 'batch_size': 8, 'flops': 9808422927232}, {'type': 'perplexity', 'in_batch_dim': [8, 448], 'batch_size': 8, 'flops': 8859220715008}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6011614078336}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3796808916480}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3480408179072}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5062411866112}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4429610391296}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4113209653888}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4746011128704}, {'type': 'perplexity', 'in_batch_dim': [8, 320], 'batch_size': 8, 'flops': 6328014815744}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6011614078336}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3480408179072}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3796808916480}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3796808916480}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4746011128704}, {'type': 'perplexity', 'in_batch_dim': [8, 416], 'batch_size': 8, 'flops': 8226419240192}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5695213340928}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6011614078336}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6011614078336}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4746011128704}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3480408179072}, {'type': 'perplexity', 'in_batch_dim': [8, 160], 'batch_size': 8, 'flops': 3164007441664}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5695213340928}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3480408179072}, {'type': 'perplexity', 'in_batch_dim': [8, 400], 'batch_size': 8, 'flops': 7910018502784}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4429610391296}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5378812603520}, {'type': 'perplexity', 'in_batch_dim': [8, 400], 'batch_size': 8, 'flops': 7910018502784}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5378812603520}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3480408179072}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6011614078336}, {'type': 'perplexity', 'in_batch_dim': [8, 160], 'batch_size': 8, 'flops': 3164007441664}, {'type': 'perplexity', 'in_batch_dim': [8, 352], 'batch_size': 8, 'flops': 6960816290560}, {'type': 'perplexity', 'in_batch_dim': [8, 320], 'batch_size': 8, 'flops': 6328014815744}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5062411866112}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5062411866112}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4113209653888}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4429610391296}, {'type': 'perplexity', 'in_batch_dim': [8, 352], 'batch_size': 8, 'flops': 6960816290560}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4113209653888}, {'type': 'perplexity', 'in_batch_dim': [8, 160], 'batch_size': 8, 'flops': 3164007441664}, {'type': 'perplexity', 'in_batch_dim': [8, 336], 'batch_size': 8, 'flops': 6644415553152}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4429610391296}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3796808916480}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5378812603520}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5378812603520}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4113209653888}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5378812603520}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3796808916480}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5378812603520}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5378812603520}, {'type': 'perplexity', 'in_batch_dim': [8, 160], 'batch_size': 8, 'flops': 3164007441664}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3480408179072}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6011614078336}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5695213340928}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5378812603520}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5378812603520}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4429610391296}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4746011128704}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6011614078336}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5062411866112}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6011614078336}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5378812603520}, {'type': 'perplexity', 'in_batch_dim': [8, 160], 'batch_size': 8, 'flops': 3164007441664}, {'type': 'perplexity', 'in_batch_dim': [8, 160], 'batch_size': 8, 'flops': 3164007441664}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5062411866112}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5062411866112}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3796808916480}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6011614078336}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4113209653888}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4746011128704}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5378812603520}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3480408179072}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3796808916480}, {'type': 'perplexity', 'in_batch_dim': [8, 320], 'batch_size': 8, 'flops': 6328014815744}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5062411866112}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4746011128704}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5062411866112}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4746011128704}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6011614078336}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3796808916480}, {'type': 'perplexity', 'in_batch_dim': [8, 160], 'batch_size': 8, 'flops': 3164007441664}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2847606704256}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4429610391296}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4746011128704}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5062411866112}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5378812603520}, {'type': 'perplexity', 'in_batch_dim': [8, 464], 'batch_size': 8, 'flops': 9175621452416}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4113209653888}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3480408179072}, {'type': 'perplexity', 'in_batch_dim': [8, 384], 'batch_size': 8, 'flops': 7593617765376}, {'type': 'perplexity', 'in_batch_dim': [8, 448], 'batch_size': 8, 'flops': 8859220715008}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5378812603520}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4113209653888}, {'type': 'perplexity', 'in_batch_dim': [8, 320], 'batch_size': 8, 'flops': 6328014815744}, {'type': 'perplexity', 'in_batch_dim': [8, 560], 'batch_size': 8, 'flops': 11074025876864}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5378812603520}, {'type': 'perplexity', 'in_batch_dim': [8, 160], 'batch_size': 8, 'flops': 3164007441664}, {'type': 'perplexity', 'in_batch_dim': [8, 320], 'batch_size': 8, 'flops': 6328014815744}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5062411866112}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6011614078336}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4746011128704}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6011614078336}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3796808916480}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4429610391296}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4113209653888}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4746011128704}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3480408179072}, {'type': 'perplexity', 'in_batch_dim': [8, 384], 'batch_size': 8, 'flops': 7593617765376}, {'type': 'perplexity', 'in_batch_dim': [8, 576], 'batch_size': 8, 'flops': 11390426614272}, {'type': 'perplexity', 'in_batch_dim': [8, 160], 'batch_size': 8, 'flops': 3164007441664}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4746011128704}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4746011128704}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4429610391296}, {'type': 'perplexity', 'in_batch_dim': [8, 336], 'batch_size': 8, 'flops': 6644415553152}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4746011128704}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3796808916480}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3796808916480}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5062411866112}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5378812603520}, {'type': 'perplexity', 'in_batch_dim': [8, 368], 'batch_size': 8, 'flops': 7277217027968}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3480408179072}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4113209653888}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4746011128704}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3480408179072}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4746011128704}, {'type': 'perplexity', 'in_batch_dim': [8, 1168], 'batch_size': 8, 'flops': 23097253898368}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5062411866112}, {'type': 'perplexity', 'in_batch_dim': [8, 160], 'batch_size': 8, 'flops': 3164007441664}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4746011128704}, {'type': 'perplexity', 'in_batch_dim': [8, 640], 'batch_size': 8, 'flops': 12656029563904}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5378812603520}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4429610391296}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4429610391296}, {'type': 'perplexity', 'in_batch_dim': [8, 384], 'batch_size': 8, 'flops': 7593617765376}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3480408179072}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4113209653888}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3796808916480}, {'type': 'perplexity', 'in_batch_dim': [8, 336], 'batch_size': 8, 'flops': 6644415553152}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3480408179072}, {'type': 'perplexity', 'in_batch_dim': [6, 208], 'batch_size': 8, 'flops': 4113209653888}], 'timestamp': '2025-09-10 02:19:59.318357', 'step': 1911, 'epoch': 1} {'type': 'pplx', 'content': 15748464.88131854, 'timestamp': '2025-09-10 02:19:59.321577', 'step': 1911, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 336], 'flops': 9966940982208}, 'timestamp': '2025-09-10 02:19:59.355005', 'step': 1911, 'epoch': 1} {'type': 'loss', 'content': 0.01734175719320774, 'timestamp': '2025-09-10 02:19:59.389250', 'step': 1912, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 7593922352128}, 'timestamp': '2025-09-10 02:19:59.431938', 'step': 1912, 'epoch': 1} {'type': 'loss', 'content': 0.001922283903695643, 'timestamp': '2025-09-10 02:19:59.436388', 'step': 1913, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 176], 'flops': 5220903722048}, 'timestamp': '2025-09-10 02:19:59.467171', 'step': 1913, 'epoch': 1} {'type': 'loss', 'content': 0.028175072744488716, 'timestamp': '2025-09-10 02:19:59.470920', 'step': 1914, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 6170111174080}, 'timestamp': '2025-09-10 02:19:59.502668', 'step': 1914, 'epoch': 1} {'type': 'loss', 'content': 0.016676677390933037, 'timestamp': '2025-09-10 02:19:59.508746', 'step': 1915, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 352], 'flops': 10441544708224}, 'timestamp': '2025-09-10 02:19:59.544225', 'step': 1915, 'epoch': 1} {'type': 'loss', 'content': 0.025236140936613083, 'timestamp': '2025-09-10 02:19:59.578750', 'step': 1916, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 176], 'flops': 5220903722048}, 'timestamp': '2025-09-10 02:19:59.612425', 'step': 1916, 'epoch': 1} {'type': 'loss', 'content': 0.012673401273787022, 'timestamp': '2025-09-10 02:19:59.614714', 'step': 1917, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 6644714900096}, 'timestamp': '2025-09-10 02:19:59.648493', 'step': 1917, 'epoch': 1} {'type': 'loss', 'content': 0.010366697795689106, 'timestamp': '2025-09-10 02:19:59.654277', 'step': 1918, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 8068526078144}, 'timestamp': '2025-09-10 02:19:59.686309', 'step': 1918, 'epoch': 1} {'type': 'loss', 'content': 0.02431515045464039, 'timestamp': '2025-09-10 02:19:59.695584', 'step': 1919, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 6170111174080}, 'timestamp': '2025-09-10 02:19:59.726935', 'step': 1919, 'epoch': 1} {'type': 'loss', 'content': 0.014513040892779827, 'timestamp': '2025-09-10 02:19:59.754412', 'step': 1920, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 7593922352128}, 'timestamp': '2025-09-10 02:19:59.786752', 'step': 1920, 'epoch': 1} {'type': 'loss', 'content': 0.006609838455915451, 'timestamp': '2025-09-10 02:19:59.791424', 'step': 1921, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 384], 'flops': 11390752160256}, 'timestamp': '2025-09-10 02:19:59.827048', 'step': 1921, 'epoch': 1} {'type': 'loss', 'content': 0.006101998034864664, 'timestamp': '2025-09-10 02:19:59.841049', 'step': 1922, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 7119318626112}, 'timestamp': '2025-09-10 02:19:59.873147', 'step': 1922, 'epoch': 1} {'type': 'loss', 'content': 0.036406856030225754, 'timestamp': '2025-09-10 02:19:59.880180', 'step': 1923, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 4746299996032}, 'timestamp': '2025-09-10 02:19:59.912547', 'step': 1923, 'epoch': 1} {'type': 'loss', 'content': 0.034558676183223724, 'timestamp': '2025-09-10 02:19:59.937079', 'step': 1924, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 304], 'flops': 9017733530176}, 'timestamp': '2025-09-10 02:19:59.969338', 'step': 1924, 'epoch': 1} {'type': 'loss', 'content': 0.00515876105055213, 'timestamp': '2025-09-10 02:19:59.978570', 'step': 1925, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 176], 'flops': 5220903722048}, 'timestamp': '2025-09-10 02:20:00.013280', 'step': 1925, 'epoch': 1} {'type': 'loss', 'content': 0.014893017709255219, 'timestamp': '2025-09-10 02:20:00.016983', 'step': 1926, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 176], 'flops': 5220903722048}, 'timestamp': '2025-09-10 02:20:00.051875', 'step': 1926, 'epoch': 1} {'type': 'loss', 'content': 0.015396283939480782, 'timestamp': '2025-09-10 02:20:00.056401', 'step': 1927, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 176], 'flops': 5220903722048}, 'timestamp': '2025-09-10 02:20:00.095090', 'step': 1927, 'epoch': 1} {'type': 'loss', 'content': 0.013009021990001202, 'timestamp': '2025-09-10 02:20:00.119808', 'step': 1928, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 304], 'flops': 9017733530176}, 'timestamp': '2025-09-10 02:20:00.155475', 'step': 1928, 'epoch': 1} {'type': 'loss', 'content': 0.026162832975387573, 'timestamp': '2025-09-10 02:20:00.164772', 'step': 1929, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 320], 'flops': 9492337256192}, 'timestamp': '2025-09-10 02:20:00.203437', 'step': 1929, 'epoch': 1} {'type': 'loss', 'content': 0.01741965487599373, 'timestamp': '2025-09-10 02:20:00.215425', 'step': 1930, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 8068526078144}, 'timestamp': '2025-09-10 02:20:00.251965', 'step': 1930, 'epoch': 1} {'type': 'loss', 'content': 0.002141000237315893, 'timestamp': '2025-09-10 02:20:00.261627', 'step': 1931, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 8543129804160}, 'timestamp': '2025-09-10 02:20:00.303520', 'step': 1931, 'epoch': 1} {'type': 'loss', 'content': 0.0038426872342824936, 'timestamp': '2025-09-10 02:20:00.334654', 'step': 1932, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 8068526078144}, 'timestamp': '2025-09-10 02:20:00.374816', 'step': 1932, 'epoch': 1} {'type': 'loss', 'content': 0.010318142361938953, 'timestamp': '2025-09-10 02:20:00.382382', 'step': 1933, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 8068526078144}, 'timestamp': '2025-09-10 02:20:00.423403', 'step': 1933, 'epoch': 1} {'type': 'loss', 'content': 0.026648273691534996, 'timestamp': '2025-09-10 02:20:00.433237', 'step': 1934, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 6644714900096}, 'timestamp': '2025-09-10 02:20:00.476849', 'step': 1934, 'epoch': 1} {'type': 'loss', 'content': 0.04163838550448418, 'timestamp': '2025-09-10 02:20:00.483471', 'step': 1935, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 7593922352128}, 'timestamp': '2025-09-10 02:20:00.516025', 'step': 1935, 'epoch': 1} {'type': 'loss', 'content': 0.006355960853397846, 'timestamp': '2025-09-10 02:20:00.544171', 'step': 1936, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 304], 'flops': 9017733530176}, 'timestamp': '2025-09-10 02:20:00.577147', 'step': 1936, 'epoch': 1} {'type': 'loss', 'content': 0.002842534566298127, 'timestamp': '2025-09-10 02:20:00.586499', 'step': 1937, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 304], 'flops': 9017733530176}, 'timestamp': '2025-09-10 02:20:00.617329', 'step': 1937, 'epoch': 1} {'type': 'loss', 'content': 0.0015678989002481103, 'timestamp': '2025-09-10 02:20:00.629113', 'step': 1938, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 6170111174080}, 'timestamp': '2025-09-10 02:20:00.660175', 'step': 1938, 'epoch': 1} {'type': 'loss', 'content': 0.016521496698260307, 'timestamp': '2025-09-10 02:20:00.666949', 'step': 1939, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 7119318626112}, 'timestamp': '2025-09-10 02:20:00.699137', 'step': 1939, 'epoch': 1} {'type': 'loss', 'content': 0.0046984353102743626, 'timestamp': '2025-09-10 02:20:00.727048', 'step': 1940, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 6644714900096}, 'timestamp': '2025-09-10 02:20:00.758090', 'step': 1940, 'epoch': 1} {'type': 'loss', 'content': 0.004027045797556639, 'timestamp': '2025-09-10 02:20:00.762685', 'step': 1941, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 6644714900096}, 'timestamp': '2025-09-10 02:20:00.793799', 'step': 1941, 'epoch': 1} {'type': 'loss', 'content': 0.02891431376338005, 'timestamp': '2025-09-10 02:20:00.800863', 'step': 1942, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 6170111174080}, 'timestamp': '2025-09-10 02:20:00.834601', 'step': 1942, 'epoch': 1} {'type': 'loss', 'content': 0.016804974526166916, 'timestamp': '2025-09-10 02:20:00.841617', 'step': 1943, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 8068526078144}, 'timestamp': '2025-09-10 02:20:00.873058', 'step': 1943, 'epoch': 1} {'type': 'loss', 'content': 0.005456176120787859, 'timestamp': '2025-09-10 02:20:00.904001', 'step': 1944, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 6170111174080}, 'timestamp': '2025-09-10 02:20:00.935753', 'step': 1944, 'epoch': 1} {'type': 'loss', 'content': 0.002787849633023143, 'timestamp': '2025-09-10 02:20:00.940304', 'step': 1945, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 8068526078144}, 'timestamp': '2025-09-10 02:20:00.972908', 'step': 1945, 'epoch': 1} {'type': 'loss', 'content': 0.021894289180636406, 'timestamp': '2025-09-10 02:20:00.983091', 'step': 1946, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 304], 'flops': 9017733530176}, 'timestamp': '2025-09-10 02:20:01.014659', 'step': 1946, 'epoch': 1} {'type': 'loss', 'content': 0.008030838333070278, 'timestamp': '2025-09-10 02:20:01.026613', 'step': 1947, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 7119318626112}, 'timestamp': '2025-09-10 02:20:01.058472', 'step': 1947, 'epoch': 1} {'type': 'loss', 'content': 0.029469074681401253, 'timestamp': '2025-09-10 02:20:01.086618', 'step': 1948, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 176], 'flops': 5220903722048}, 'timestamp': '2025-09-10 02:20:01.117184', 'step': 1948, 'epoch': 1} {'type': 'loss', 'content': 0.004061527084559202, 'timestamp': '2025-09-10 02:20:01.119788', 'step': 1949, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 6170111174080}, 'timestamp': '2025-09-10 02:20:01.152023', 'step': 1949, 'epoch': 1} {'type': 'loss', 'content': 0.010181749239563942, 'timestamp': '2025-09-10 02:20:01.158883', 'step': 1950, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 5695507448064}, 'timestamp': '2025-09-10 02:20:01.189424', 'step': 1950, 'epoch': 1} {'type': 'loss', 'content': 0.021814599633216858, 'timestamp': '2025-09-10 02:20:01.193951', 'step': 1951, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 7593922352128}, 'timestamp': '2025-09-10 02:20:01.228797', 'step': 1951, 'epoch': 1} {'type': 'loss', 'content': 0.009718448854982853, 'timestamp': '2025-09-10 02:20:01.257497', 'step': 1952, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 8068526078144}, 'timestamp': '2025-09-10 02:20:01.289417', 'step': 1952, 'epoch': 1} {'type': 'loss', 'content': 0.005462608300149441, 'timestamp': '2025-09-10 02:20:01.296372', 'step': 1953, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 7119318626112}, 'timestamp': '2025-09-10 02:20:01.327704', 'step': 1953, 'epoch': 1} {'type': 'loss', 'content': 0.002389210741966963, 'timestamp': '2025-09-10 02:20:01.334816', 'step': 1954, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 8068526078144}, 'timestamp': '2025-09-10 02:20:01.365920', 'step': 1954, 'epoch': 1} {'type': 'loss', 'content': 0.0015777194639667869, 'timestamp': '2025-09-10 02:20:01.375825', 'step': 1955, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 7593922352128}, 'timestamp': '2025-09-10 02:20:01.406777', 'step': 1955, 'epoch': 1} {'type': 'loss', 'content': 0.007745720446109772, 'timestamp': '2025-09-10 02:20:01.435074', 'step': 1956, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 368], 'flops': 10916148434240}, 'timestamp': '2025-09-10 02:20:01.468323', 'step': 1956, 'epoch': 1} {'type': 'loss', 'content': 0.009010471403598785, 'timestamp': '2025-09-10 02:20:01.481413', 'step': 1957, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 336], 'flops': 9966940982208}, 'timestamp': '2025-09-10 02:20:01.514875', 'step': 1957, 'epoch': 1} {'type': 'loss', 'content': 0.006604184862226248, 'timestamp': '2025-09-10 02:20:01.528177', 'step': 1958, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 7593922352128}, 'timestamp': '2025-09-10 02:20:01.561394', 'step': 1958, 'epoch': 1} {'type': 'loss', 'content': 0.0016930067213252187, 'timestamp': '2025-09-10 02:20:01.569093', 'step': 1959, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 304], 'flops': 9017733530176}, 'timestamp': '2025-09-10 02:20:01.601187', 'step': 1959, 'epoch': 1} {'type': 'loss', 'content': 0.016999879851937294, 'timestamp': '2025-09-10 02:20:01.633933', 'step': 1960, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 6644714900096}, 'timestamp': '2025-09-10 02:20:01.664733', 'step': 1960, 'epoch': 1} {'type': 'loss', 'content': 0.0018128232331946492, 'timestamp': '2025-09-10 02:20:01.669447', 'step': 1961, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 7593922352128}, 'timestamp': '2025-09-10 02:20:01.700432', 'step': 1961, 'epoch': 1} {'type': 'loss', 'content': 0.0029406710527837276, 'timestamp': '2025-09-10 02:20:01.708110', 'step': 1962, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 6170111174080}, 'timestamp': '2025-09-10 02:20:01.740014', 'step': 1962, 'epoch': 1} {'type': 'loss', 'content': 0.011884416453540325, 'timestamp': '2025-09-10 02:20:01.746627', 'step': 1963, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 8068526078144}, 'timestamp': '2025-09-10 02:20:01.777969', 'step': 1963, 'epoch': 1} {'type': 'loss', 'content': 0.011890656314790249, 'timestamp': '2025-09-10 02:20:01.808641', 'step': 1964, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 7119318626112}, 'timestamp': '2025-09-10 02:20:01.842975', 'step': 1964, 'epoch': 1} {'type': 'loss', 'content': 0.012111729942262173, 'timestamp': '2025-09-10 02:20:01.847753', 'step': 1965, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 4746299996032}, 'timestamp': '2025-09-10 02:20:01.878753', 'step': 1965, 'epoch': 1} {'type': 'loss', 'content': 0.00835806131362915, 'timestamp': '2025-09-10 02:20:01.881443', 'step': 1966, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 320], 'flops': 9492337256192}, 'timestamp': '2025-09-10 02:20:01.912407', 'step': 1966, 'epoch': 1} {'type': 'loss', 'content': 0.01073089987039566, 'timestamp': '2025-09-10 02:20:01.924880', 'step': 1967, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 176], 'flops': 5220903722048}, 'timestamp': '2025-09-10 02:20:01.956058', 'step': 1967, 'epoch': 1} {'type': 'loss', 'content': 0.0013615777716040611, 'timestamp': '2025-09-10 02:20:01.980552', 'step': 1968, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 6170111174080}, 'timestamp': '2025-09-10 02:20:02.011121', 'step': 1968, 'epoch': 1} {'type': 'loss', 'content': 0.026787450537085533, 'timestamp': '2025-09-10 02:20:02.015414', 'step': 1969, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 6170111174080}, 'timestamp': '2025-09-10 02:20:02.046192', 'step': 1969, 'epoch': 1} {'type': 'loss', 'content': 0.024200987070798874, 'timestamp': '2025-09-10 02:20:02.052835', 'step': 1970, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 4746299996032}, 'timestamp': '2025-09-10 02:20:02.095175', 'step': 1970, 'epoch': 1} {'type': 'loss', 'content': 0.022902294993400574, 'timestamp': '2025-09-10 02:20:02.097512', 'step': 1971, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 304], 'flops': 9017733530176}, 'timestamp': '2025-09-10 02:20:02.129584', 'step': 1971, 'epoch': 1} {'type': 'loss', 'content': 0.01670275256037712, 'timestamp': '2025-09-10 02:20:02.162362', 'step': 1972, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-10 02:20:02.194796', 'step': 1972, 'epoch': 1} {'type': 'loss', 'content': 0.004363041836768389, 'timestamp': '2025-09-10 02:20:02.196975', 'step': 1973, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 384], 'flops': 11390752160256}, 'timestamp': '2025-09-10 02:20:02.233718', 'step': 1973, 'epoch': 1} {'type': 'loss', 'content': 0.007900647819042206, 'timestamp': '2025-09-10 02:20:02.247731', 'step': 1974, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 176], 'flops': 5220903722048}, 'timestamp': '2025-09-10 02:20:02.279427', 'step': 1974, 'epoch': 1} {'type': 'loss', 'content': 0.023129496723413467, 'timestamp': '2025-09-10 02:20:02.283217', 'step': 1975, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 7593922352128}, 'timestamp': '2025-09-10 02:20:02.314827', 'step': 1975, 'epoch': 1} {'type': 'loss', 'content': 0.0061828577890992165, 'timestamp': '2025-09-10 02:20:02.343125', 'step': 1976, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 432], 'flops': 12814563338304}, 'timestamp': '2025-09-10 02:20:02.380946', 'step': 1976, 'epoch': 1} {'type': 'loss', 'content': 0.011836091056466103, 'timestamp': '2025-09-10 02:20:02.396599', 'step': 1977, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 7593922352128}, 'timestamp': '2025-09-10 02:20:02.427786', 'step': 1977, 'epoch': 1} {'type': 'loss', 'content': 0.054816387593746185, 'timestamp': '2025-09-10 02:20:02.435329', 'step': 1978, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 8068526078144}, 'timestamp': '2025-09-10 02:20:02.467061', 'step': 1978, 'epoch': 1} {'type': 'loss', 'content': 0.0033305014949291945, 'timestamp': '2025-09-10 02:20:02.477014', 'step': 1979, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 7593922352128}, 'timestamp': '2025-09-10 02:20:02.511551', 'step': 1979, 'epoch': 1} {'type': 'loss', 'content': 0.001081528840586543, 'timestamp': '2025-09-10 02:20:02.539830', 'step': 1980, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 7119318626112}, 'timestamp': '2025-09-10 02:20:02.580188', 'step': 1980, 'epoch': 1} {'type': 'loss', 'content': 0.03720958158373833, 'timestamp': '2025-09-10 02:20:02.585005', 'step': 1981, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 4746299996032}, 'timestamp': '2025-09-10 02:20:02.627441', 'step': 1981, 'epoch': 1} {'type': 'loss', 'content': 0.017248960211873055, 'timestamp': '2025-09-10 02:20:02.631086', 'step': 1982, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 448], 'flops': 13289167064320}, 'timestamp': '2025-09-10 02:20:02.673598', 'step': 1982, 'epoch': 1} {'type': 'loss', 'content': 0.006337800528854132, 'timestamp': '2025-09-10 02:20:02.689914', 'step': 1983, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 6170111174080}, 'timestamp': '2025-09-10 02:20:02.724966', 'step': 1983, 'epoch': 1} {'type': 'loss', 'content': 0.038486216217279434, 'timestamp': '2025-09-10 02:20:02.751861', 'step': 1984, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 7119318626112}, 'timestamp': '2025-09-10 02:20:02.783687', 'step': 1984, 'epoch': 1} {'type': 'loss', 'content': 0.01467389427125454, 'timestamp': '2025-09-10 02:20:02.788693', 'step': 1985, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 5695507448064}, 'timestamp': '2025-09-10 02:20:02.819594', 'step': 1985, 'epoch': 1} {'type': 'loss', 'content': 0.01384007465094328, 'timestamp': '2025-09-10 02:20:02.823988', 'step': 1986, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 6644714900096}, 'timestamp': '2025-09-10 02:20:02.855334', 'step': 1986, 'epoch': 1} {'type': 'loss', 'content': 0.029948865994811058, 'timestamp': '2025-09-10 02:20:02.862195', 'step': 1987, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 6170111174080}, 'timestamp': '2025-09-10 02:20:02.893690', 'step': 1987, 'epoch': 1} {'type': 'loss', 'content': 0.007479586638510227, 'timestamp': '2025-09-10 02:20:02.921136', 'step': 1988, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 6170111174080}, 'timestamp': '2025-09-10 02:20:02.951996', 'step': 1988, 'epoch': 1} {'type': 'loss', 'content': 0.00857719499617815, 'timestamp': '2025-09-10 02:20:02.956381', 'step': 1989, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 8068526078144}, 'timestamp': '2025-09-10 02:20:02.988249', 'step': 1989, 'epoch': 1} {'type': 'loss', 'content': 0.012705625034868717, 'timestamp': '2025-09-10 02:20:02.998080', 'step': 1990, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 6644714900096}, 'timestamp': '2025-09-10 02:20:03.030003', 'step': 1990, 'epoch': 1} {'type': 'loss', 'content': 0.015617369674146175, 'timestamp': '2025-09-10 02:20:03.036771', 'step': 1991, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 320], 'flops': 9492337256192}, 'timestamp': '2025-09-10 02:20:03.068372', 'step': 1991, 'epoch': 1} {'type': 'loss', 'content': 0.014881722629070282, 'timestamp': '2025-09-10 02:20:03.101574', 'step': 1992, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 5695507448064}, 'timestamp': '2025-09-10 02:20:03.132743', 'step': 1992, 'epoch': 1} {'type': 'loss', 'content': 0.012684579007327557, 'timestamp': '2025-09-10 02:20:03.135471', 'step': 1993, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 6170111174080}, 'timestamp': '2025-09-10 02:20:03.167036', 'step': 1993, 'epoch': 1} {'type': 'loss', 'content': 0.00392954470589757, 'timestamp': '2025-09-10 02:20:03.173954', 'step': 1994, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 7119318626112}, 'timestamp': '2025-09-10 02:20:03.206365', 'step': 1994, 'epoch': 1} {'type': 'loss', 'content': 0.003335257526487112, 'timestamp': '2025-09-10 02:20:03.213851', 'step': 1995, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 176], 'flops': 5220903722048}, 'timestamp': '2025-09-10 02:20:03.244516', 'step': 1995, 'epoch': 1} {'type': 'loss', 'content': 0.014795198105275631, 'timestamp': '2025-09-10 02:20:03.269528', 'step': 1996, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 7593922352128}, 'timestamp': '2025-09-10 02:20:03.300340', 'step': 1996, 'epoch': 1} {'type': 'loss', 'content': 0.003911779262125492, 'timestamp': '2025-09-10 02:20:03.305613', 'step': 1997, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 8068526078144}, 'timestamp': '2025-09-10 02:20:03.335757', 'step': 1997, 'epoch': 1} {'type': 'loss', 'content': 0.0010702766012400389, 'timestamp': '2025-09-10 02:20:03.346036', 'step': 1998, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 5695507448064}, 'timestamp': '2025-09-10 02:20:03.378598', 'step': 1998, 'epoch': 1} {'type': 'loss', 'content': 0.006708834785968065, 'timestamp': '2025-09-10 02:20:03.383112', 'step': 1999, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 304], 'flops': 9017733530176}, 'timestamp': '2025-09-10 02:20:03.413851', 'step': 1999, 'epoch': 1} {'type': 'loss', 'content': 0.018123431131243706, 'timestamp': '2025-09-10 02:20:03.446982', 'step': 2000, 'epoch': 1} {'type': 'info', 'content': 'Checkpoint saved at step 2000', 'timestamp': '2025-09-10 02:20:08.143067', 'step': 2000, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 320], 'flops': 9492337256192}, 'timestamp': '2025-09-10 02:20:08.176414', 'step': 2000, 'epoch': 1} {'type': 'loss', 'content': 0.013308617286384106, 'timestamp': '2025-09-10 02:20:08.184018', 'step': 2001, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 176], 'flops': 5220903722048}, 'timestamp': '2025-09-10 02:20:08.216585', 'step': 2001, 'epoch': 1} {'type': 'loss', 'content': 0.009740256704390049, 'timestamp': '2025-09-10 02:20:08.220053', 'step': 2002, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 5695507448064}, 'timestamp': '2025-09-10 02:20:08.255010', 'step': 2002, 'epoch': 1} {'type': 'loss', 'content': 0.002783420728519559, 'timestamp': '2025-09-10 02:20:08.259076', 'step': 2003, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 6170111174080}, 'timestamp': '2025-09-10 02:20:08.290467', 'step': 2003, 'epoch': 1} {'type': 'loss', 'content': 0.016279999166727066, 'timestamp': '2025-09-10 02:20:08.317646', 'step': 2004, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 320], 'flops': 9492337256192}, 'timestamp': '2025-09-10 02:20:08.350363', 'step': 2004, 'epoch': 1} {'type': 'loss', 'content': 0.002739792922511697, 'timestamp': '2025-09-10 02:20:08.360066', 'step': 2005, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 6170111174080}, 'timestamp': '2025-09-10 02:20:08.391551', 'step': 2005, 'epoch': 1} {'type': 'loss', 'content': 0.0065587302669882774, 'timestamp': '2025-09-10 02:20:08.398108', 'step': 2006, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 5695507448064}, 'timestamp': '2025-09-10 02:20:08.429557', 'step': 2006, 'epoch': 1} {'type': 'loss', 'content': 0.03201238065958023, 'timestamp': '2025-09-10 02:20:08.433423', 'step': 2007, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 8543129804160}, 'timestamp': '2025-09-10 02:20:08.465425', 'step': 2007, 'epoch': 1} {'type': 'loss', 'content': 0.0022500918712466955, 'timestamp': '2025-09-10 02:20:08.496454', 'step': 2008, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 8068526078144}, 'timestamp': '2025-09-10 02:20:08.528796', 'step': 2008, 'epoch': 1} {'type': 'loss', 'content': 0.04503735154867172, 'timestamp': '2025-09-10 02:20:08.535233', 'step': 2009, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 7593922352128}, 'timestamp': '2025-09-10 02:20:08.566556', 'step': 2009, 'epoch': 1} {'type': 'loss', 'content': 0.0013476565945893526, 'timestamp': '2025-09-10 02:20:08.574229', 'step': 2010, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-10 02:20:08.606641', 'step': 2010, 'epoch': 1} {'type': 'loss', 'content': 0.0042086499743163586, 'timestamp': '2025-09-10 02:20:08.608893', 'step': 2011, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 6170111174080}, 'timestamp': '2025-09-10 02:20:08.640311', 'step': 2011, 'epoch': 1} {'type': 'loss', 'content': 0.006380919367074966, 'timestamp': '2025-09-10 02:20:08.667889', 'step': 2012, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 6170111174080}, 'timestamp': '2025-09-10 02:20:08.699255', 'step': 2012, 'epoch': 1} {'type': 'loss', 'content': 0.02008945681154728, 'timestamp': '2025-09-10 02:20:08.703452', 'step': 2013, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 4746299996032}, 'timestamp': '2025-09-10 02:20:08.734085', 'step': 2013, 'epoch': 1} {'type': 'loss', 'content': 0.0637151375412941, 'timestamp': '2025-09-10 02:20:08.736305', 'step': 2014, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 5695507448064}, 'timestamp': '2025-09-10 02:20:08.767522', 'step': 2014, 'epoch': 1} {'type': 'loss', 'content': 0.0012976779835298657, 'timestamp': '2025-09-10 02:20:08.771683', 'step': 2015, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 7593922352128}, 'timestamp': '2025-09-10 02:20:08.802724', 'step': 2015, 'epoch': 1} {'type': 'loss', 'content': 0.013720971532166004, 'timestamp': '2025-09-10 02:20:08.831025', 'step': 2016, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 5695507448064}, 'timestamp': '2025-09-10 02:20:08.862613', 'step': 2016, 'epoch': 1} {'type': 'loss', 'content': 0.017362453043460846, 'timestamp': '2025-09-10 02:20:08.865166', 'step': 2017, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 7593922352128}, 'timestamp': '2025-09-10 02:20:08.896836', 'step': 2017, 'epoch': 1} {'type': 'loss', 'content': 0.005491399206221104, 'timestamp': '2025-09-10 02:20:08.904248', 'step': 2018, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 5695507448064}, 'timestamp': '2025-09-10 02:20:08.935110', 'step': 2018, 'epoch': 1} {'type': 'loss', 'content': 0.01876024715602398, 'timestamp': '2025-09-10 02:20:08.939187', 'step': 2019, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 4746299996032}, 'timestamp': '2025-09-10 02:20:08.972204', 'step': 2019, 'epoch': 1} {'type': 'loss', 'content': 0.009473063051700592, 'timestamp': '2025-09-10 02:20:08.996335', 'step': 2020, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 8543129804160}, 'timestamp': '2025-09-10 02:20:09.029022', 'step': 2020, 'epoch': 1} {'type': 'loss', 'content': 0.01576576568186283, 'timestamp': '2025-09-10 02:20:09.036361', 'step': 2021, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 352], 'flops': 10441544708224}, 'timestamp': '2025-09-10 02:20:09.071077', 'step': 2021, 'epoch': 1} {'type': 'loss', 'content': 0.030170930549502373, 'timestamp': '2025-09-10 02:20:09.084825', 'step': 2022, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 352], 'flops': 10441544708224}, 'timestamp': '2025-09-10 02:20:09.120147', 'step': 2022, 'epoch': 1} {'type': 'loss', 'content': 0.02932755835354328, 'timestamp': '2025-09-10 02:20:09.133836', 'step': 2023, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 7119318626112}, 'timestamp': '2025-09-10 02:20:09.165574', 'step': 2023, 'epoch': 1} {'type': 'loss', 'content': 0.0012282658135518432, 'timestamp': '2025-09-10 02:20:09.193653', 'step': 2024, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 352], 'flops': 10441544708224}, 'timestamp': '2025-09-10 02:20:09.226223', 'step': 2024, 'epoch': 1} {'type': 'loss', 'content': 0.006276692263782024, 'timestamp': '2025-09-10 02:20:09.239234', 'step': 2025, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 7593922352128}, 'timestamp': '2025-09-10 02:20:09.270967', 'step': 2025, 'epoch': 1} {'type': 'loss', 'content': 0.011697587557137012, 'timestamp': '2025-09-10 02:20:09.278708', 'step': 2026, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 7119318626112}, 'timestamp': '2025-09-10 02:20:09.311121', 'step': 2026, 'epoch': 1} {'type': 'loss', 'content': 0.007251562085002661, 'timestamp': '2025-09-10 02:20:09.318243', 'step': 2027, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 6644714900096}, 'timestamp': '2025-09-10 02:20:09.349398', 'step': 2027, 'epoch': 1} {'type': 'loss', 'content': 0.00808743666857481, 'timestamp': '2025-09-10 02:20:09.377091', 'step': 2028, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 176], 'flops': 5220903722048}, 'timestamp': '2025-09-10 02:20:09.408086', 'step': 2028, 'epoch': 1} {'type': 'loss', 'content': 0.0025386668276041746, 'timestamp': '2025-09-10 02:20:09.410319', 'step': 2029, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 4746299996032}, 'timestamp': '2025-09-10 02:20:09.441351', 'step': 2029, 'epoch': 1} {'type': 'loss', 'content': 0.059052709490060806, 'timestamp': '2025-09-10 02:20:09.443589', 'step': 2030, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 7593922352128}, 'timestamp': '2025-09-10 02:20:09.476520', 'step': 2030, 'epoch': 1} {'type': 'loss', 'content': 0.01944047398865223, 'timestamp': '2025-09-10 02:20:09.484150', 'step': 2031, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 336], 'flops': 9966940982208}, 'timestamp': '2025-09-10 02:20:09.518094', 'step': 2031, 'epoch': 1} {'type': 'loss', 'content': 0.01776472106575966, 'timestamp': '2025-09-10 02:20:09.552272', 'step': 2032, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 7593922352128}, 'timestamp': '2025-09-10 02:20:09.586355', 'step': 2032, 'epoch': 1} {'type': 'loss', 'content': 0.0041867028921842575, 'timestamp': '2025-09-10 02:20:09.591297', 'step': 2033, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 320], 'flops': 9492337256192}, 'timestamp': '2025-09-10 02:20:09.624274', 'step': 2033, 'epoch': 1} {'type': 'loss', 'content': 0.016866056248545647, 'timestamp': '2025-09-10 02:20:09.636312', 'step': 2034, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 6644714900096}, 'timestamp': '2025-09-10 02:20:09.667931', 'step': 2034, 'epoch': 1} {'type': 'loss', 'content': 0.009813317097723484, 'timestamp': '2025-09-10 02:20:09.674821', 'step': 2035, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 528], 'flops': 15662185694400}, 'timestamp': '2025-09-10 02:20:09.720805', 'step': 2035, 'epoch': 1} {'type': 'loss', 'content': 0.009570365771651268, 'timestamp': '2025-09-10 02:20:09.760848', 'step': 2036, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 7119318626112}, 'timestamp': '2025-09-10 02:20:09.793278', 'step': 2036, 'epoch': 1} {'type': 'loss', 'content': 0.011103704571723938, 'timestamp': '2025-09-10 02:20:09.797984', 'step': 2037, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 336], 'flops': 9966940982208}, 'timestamp': '2025-09-10 02:20:09.832433', 'step': 2037, 'epoch': 1} {'type': 'loss', 'content': 0.016965234652161598, 'timestamp': '2025-09-10 02:20:09.845793', 'step': 2038, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 6170111174080}, 'timestamp': '2025-09-10 02:20:09.877310', 'step': 2038, 'epoch': 1} {'type': 'loss', 'content': 0.012328843586146832, 'timestamp': '2025-09-10 02:20:09.883972', 'step': 2039, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 7119318626112}, 'timestamp': '2025-09-10 02:20:09.915267', 'step': 2039, 'epoch': 1} {'type': 'loss', 'content': 0.01965012215077877, 'timestamp': '2025-09-10 02:20:09.943494', 'step': 2040, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 7119318626112}, 'timestamp': '2025-09-10 02:20:09.975189', 'step': 2040, 'epoch': 1} {'type': 'loss', 'content': 0.015359156765043736, 'timestamp': '2025-09-10 02:20:09.979739', 'step': 2041, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 368], 'flops': 10916148434240}, 'timestamp': '2025-09-10 02:20:10.015411', 'step': 2041, 'epoch': 1} {'type': 'loss', 'content': 0.025193924084305763, 'timestamp': '2025-09-10 02:20:10.029190', 'step': 2042, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 6170111174080}, 'timestamp': '2025-09-10 02:20:10.063910', 'step': 2042, 'epoch': 1} {'type': 'loss', 'content': 0.006153655704110861, 'timestamp': '2025-09-10 02:20:10.070617', 'step': 2043, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 416], 'flops': 12339959612288}, 'timestamp': '2025-09-10 02:20:10.109571', 'step': 2043, 'epoch': 1} {'type': 'loss', 'content': 0.003595164744183421, 'timestamp': '2025-09-10 02:20:10.146348', 'step': 2044, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 336], 'flops': 9966940982208}, 'timestamp': '2025-09-10 02:20:10.179076', 'step': 2044, 'epoch': 1} {'type': 'loss', 'content': 0.002315750578418374, 'timestamp': '2025-09-10 02:20:10.191421', 'step': 2045, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 5695507448064}, 'timestamp': '2025-09-10 02:20:10.222927', 'step': 2045, 'epoch': 1} {'type': 'loss', 'content': 0.01863052323460579, 'timestamp': '2025-09-10 02:20:10.227211', 'step': 2046, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 5695507448064}, 'timestamp': '2025-09-10 02:20:10.266149', 'step': 2046, 'epoch': 1} {'type': 'loss', 'content': 0.02388039045035839, 'timestamp': '2025-09-10 02:20:10.270375', 'step': 2047, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 8068526078144}, 'timestamp': '2025-09-10 02:20:10.302182', 'step': 2047, 'epoch': 1} {'type': 'loss', 'content': 0.014288315549492836, 'timestamp': '2025-09-10 02:20:10.332651', 'step': 2048, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 176], 'flops': 5220903722048}, 'timestamp': '2025-09-10 02:20:10.364502', 'step': 2048, 'epoch': 1} {'type': 'loss', 'content': 0.02336038462817669, 'timestamp': '2025-09-10 02:20:10.366451', 'step': 2049, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 7119318626112}, 'timestamp': '2025-09-10 02:20:10.399108', 'step': 2049, 'epoch': 1} {'type': 'loss', 'content': 0.03333750367164612, 'timestamp': '2025-09-10 02:20:10.406210', 'step': 2050, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 336], 'flops': 9966940982208}, 'timestamp': '2025-09-10 02:20:10.439798', 'step': 2050, 'epoch': 1} {'type': 'loss', 'content': 0.03487107530236244, 'timestamp': '2025-09-10 02:20:10.453116', 'step': 2051, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 7119318626112}, 'timestamp': '2025-09-10 02:20:10.488181', 'step': 2051, 'epoch': 1} {'type': 'loss', 'content': 0.008551175706088543, 'timestamp': '2025-09-10 02:20:10.516117', 'step': 2052, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 5695507448064}, 'timestamp': '2025-09-10 02:20:10.560118', 'step': 2052, 'epoch': 1} {'type': 'loss', 'content': 0.000608120986726135, 'timestamp': '2025-09-10 02:20:10.565386', 'step': 2053, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 8543129804160}, 'timestamp': '2025-09-10 02:20:10.603975', 'step': 2053, 'epoch': 1} {'type': 'loss', 'content': 0.0034163747914135456, 'timestamp': '2025-09-10 02:20:10.614377', 'step': 2054, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 4746299996032}, 'timestamp': '2025-09-10 02:20:10.651410', 'step': 2054, 'epoch': 1} {'type': 'loss', 'content': 0.008907620795071125, 'timestamp': '2025-09-10 02:20:10.656895', 'step': 2055, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 6170111174080}, 'timestamp': '2025-09-10 02:20:10.692828', 'step': 2055, 'epoch': 1} {'type': 'loss', 'content': 0.022446779534220695, 'timestamp': '2025-09-10 02:20:10.720333', 'step': 2056, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 304], 'flops': 9017733530176}, 'timestamp': '2025-09-10 02:20:10.756370', 'step': 2056, 'epoch': 1} {'type': 'loss', 'content': 0.021804099902510643, 'timestamp': '2025-09-10 02:20:10.765348', 'step': 2057, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 8068526078144}, 'timestamp': '2025-09-10 02:20:10.796437', 'step': 2057, 'epoch': 1} {'type': 'loss', 'content': 0.004856002051383257, 'timestamp': '2025-09-10 02:20:10.806145', 'step': 2058, 'epoch': 1} {'type': 'flops', 'content': [{'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5695213340928}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3796808916480}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4429610391296}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4746011128704}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4746011128704}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6011614078336}, {'type': 'perplexity', 'in_batch_dim': [8, 736], 'batch_size': 8, 'flops': 14554433988352}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3796808916480}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4113209653888}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3796808916480}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4746011128704}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2847606704256}, {'type': 'perplexity', 'in_batch_dim': [8, 320], 'batch_size': 8, 'flops': 6328014815744}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5378812603520}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3796808916480}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4746011128704}, {'type': 'perplexity', 'in_batch_dim': [8, 352], 'batch_size': 8, 'flops': 6960816290560}, {'type': 'perplexity', 'in_batch_dim': [8, 384], 'batch_size': 8, 'flops': 7593617765376}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5378812603520}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4429610391296}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4113209653888}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5062411866112}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5062411866112}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4429610391296}, {'type': 'perplexity', 'in_batch_dim': [8, 480], 'batch_size': 8, 'flops': 9492022189824}, {'type': 'perplexity', 'in_batch_dim': [8, 448], 'batch_size': 8, 'flops': 8859220715008}, {'type': 'perplexity', 'in_batch_dim': [8, 320], 'batch_size': 8, 'flops': 6328014815744}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3796808916480}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6011614078336}, {'type': 'perplexity', 'in_batch_dim': [8, 320], 'batch_size': 8, 'flops': 6328014815744}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5695213340928}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4746011128704}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4429610391296}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5378812603520}, {'type': 'perplexity', 'in_batch_dim': [8, 544], 'batch_size': 8, 'flops': 10757625139456}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4429610391296}, {'type': 'perplexity', 'in_batch_dim': [8, 320], 'batch_size': 8, 'flops': 6328014815744}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4113209653888}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4113209653888}, {'type': 'perplexity', 'in_batch_dim': [8, 320], 'batch_size': 8, 'flops': 6328014815744}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3480408179072}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5062411866112}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6011614078336}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4429610391296}, {'type': 'perplexity', 'in_batch_dim': [8, 384], 'batch_size': 8, 'flops': 7593617765376}, {'type': 'perplexity', 'in_batch_dim': [8, 416], 'batch_size': 8, 'flops': 8226419240192}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4746011128704}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3480408179072}, {'type': 'perplexity', 'in_batch_dim': [8, 384], 'batch_size': 8, 'flops': 7593617765376}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3480408179072}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3480408179072}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5378812603520}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3796808916480}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4113209653888}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5062411866112}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5378812603520}, {'type': 'perplexity', 'in_batch_dim': [8, 320], 'batch_size': 8, 'flops': 6328014815744}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4429610391296}, {'type': 'perplexity', 'in_batch_dim': [8, 160], 'batch_size': 8, 'flops': 3164007441664}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4113209653888}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5695213340928}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3796808916480}, {'type': 'perplexity', 'in_batch_dim': [8, 160], 'batch_size': 8, 'flops': 3164007441664}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6011614078336}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5062411866112}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4429610391296}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3480408179072}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4746011128704}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3480408179072}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4113209653888}, {'type': 'perplexity', 'in_batch_dim': [8, 624], 'batch_size': 8, 'flops': 12339628826496}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4113209653888}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5695213340928}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5062411866112}, {'type': 'perplexity', 'in_batch_dim': [8, 384], 'batch_size': 8, 'flops': 7593617765376}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4429610391296}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4113209653888}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5062411866112}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4429610391296}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5695213340928}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3796808916480}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5378812603520}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4429610391296}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5378812603520}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3796808916480}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5695213340928}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4429610391296}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5062411866112}, {'type': 'perplexity', 'in_batch_dim': [8, 160], 'batch_size': 8, 'flops': 3164007441664}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4746011128704}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5695213340928}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3796808916480}, {'type': 'perplexity', 'in_batch_dim': [8, 416], 'batch_size': 8, 'flops': 8226419240192}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3796808916480}, {'type': 'perplexity', 'in_batch_dim': [8, 160], 'batch_size': 8, 'flops': 3164007441664}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4746011128704}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3796808916480}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2847606704256}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5378812603520}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4429610391296}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4113209653888}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4429610391296}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6011614078336}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4113209653888}, {'type': 'perplexity', 'in_batch_dim': [8, 320], 'batch_size': 8, 'flops': 6328014815744}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4746011128704}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4746011128704}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3796808916480}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5062411866112}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3480408179072}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3796808916480}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3480408179072}, {'type': 'perplexity', 'in_batch_dim': [8, 352], 'batch_size': 8, 'flops': 6960816290560}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5062411866112}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4429610391296}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6011614078336}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5062411866112}, {'type': 'perplexity', 'in_batch_dim': [8, 576], 'batch_size': 8, 'flops': 11390426614272}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4429610391296}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3796808916480}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6011614078336}, {'type': 'perplexity', 'in_batch_dim': [8, 432], 'batch_size': 8, 'flops': 8542819977600}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5378812603520}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5378812603520}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5062411866112}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4113209653888}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4113209653888}, {'type': 'perplexity', 'in_batch_dim': [8, 368], 'batch_size': 8, 'flops': 7277217027968}, {'type': 'perplexity', 'in_batch_dim': [8, 160], 'batch_size': 8, 'flops': 3164007441664}, {'type': 'perplexity', 'in_batch_dim': [8, 368], 'batch_size': 8, 'flops': 7277217027968}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3796808916480}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4746011128704}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4113209653888}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4429610391296}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5695213340928}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5695213340928}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5062411866112}, {'type': 'perplexity', 'in_batch_dim': [8, 432], 'batch_size': 8, 'flops': 8542819977600}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5062411866112}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4113209653888}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3480408179072}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3480408179072}, {'type': 'perplexity', 'in_batch_dim': [8, 160], 'batch_size': 8, 'flops': 3164007441664}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4113209653888}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3796808916480}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5062411866112}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 368], 'batch_size': 8, 'flops': 7277217027968}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5378812603520}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5378812603520}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5062411866112}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2847606704256}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4429610391296}, {'type': 'perplexity', 'in_batch_dim': [8, 320], 'batch_size': 8, 'flops': 6328014815744}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3796808916480}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4429610391296}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4113209653888}, {'type': 'perplexity', 'in_batch_dim': [8, 384], 'batch_size': 8, 'flops': 7593617765376}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5378812603520}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4746011128704}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3480408179072}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5062411866112}, {'type': 'perplexity', 'in_batch_dim': [8, 320], 'batch_size': 8, 'flops': 6328014815744}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5378812603520}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4113209653888}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4113209653888}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5695213340928}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3480408179072}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6011614078336}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4746011128704}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5378812603520}, {'type': 'perplexity', 'in_batch_dim': [8, 320], 'batch_size': 8, 'flops': 6328014815744}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4429610391296}, {'type': 'perplexity', 'in_batch_dim': [8, 320], 'batch_size': 8, 'flops': 6328014815744}, {'type': 'perplexity', 'in_batch_dim': [8, 416], 'batch_size': 8, 'flops': 8226419240192}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5062411866112}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5062411866112}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2847606704256}, {'type': 'perplexity', 'in_batch_dim': [8, 336], 'batch_size': 8, 'flops': 6644415553152}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6011614078336}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5062411866112}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4113209653888}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5695213340928}, {'type': 'perplexity', 'in_batch_dim': [8, 416], 'batch_size': 8, 'flops': 8226419240192}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3796808916480}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5695213340928}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4746011128704}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4429610391296}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4113209653888}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3480408179072}, {'type': 'perplexity', 'in_batch_dim': [8, 320], 'batch_size': 8, 'flops': 6328014815744}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3480408179072}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5378812603520}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6011614078336}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4429610391296}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5695213340928}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5695213340928}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3480408179072}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4429610391296}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5062411866112}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3480408179072}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4113209653888}, {'type': 'perplexity', 'in_batch_dim': [8, 352], 'batch_size': 8, 'flops': 6960816290560}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4113209653888}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2847606704256}, {'type': 'perplexity', 'in_batch_dim': [8, 160], 'batch_size': 8, 'flops': 3164007441664}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3796808916480}, {'type': 'perplexity', 'in_batch_dim': [8, 496], 'batch_size': 8, 'flops': 9808422927232}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3480408179072}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4113209653888}, {'type': 'perplexity', 'in_batch_dim': [8, 544], 'batch_size': 8, 'flops': 10757625139456}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6011614078336}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5378812603520}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5695213340928}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4746011128704}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5695213340928}, {'type': 'perplexity', 'in_batch_dim': [8, 384], 'batch_size': 8, 'flops': 7593617765376}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3480408179072}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5378812603520}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3480408179072}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5378812603520}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5062411866112}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3796808916480}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5062411866112}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5695213340928}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5378812603520}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4429610391296}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5062411866112}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3480408179072}, {'type': 'perplexity', 'in_batch_dim': [8, 656], 'batch_size': 8, 'flops': 12972430301312}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5062411866112}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3796808916480}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3480408179072}, {'type': 'perplexity', 'in_batch_dim': [8, 160], 'batch_size': 8, 'flops': 3164007441664}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5695213340928}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6011614078336}, {'type': 'perplexity', 'in_batch_dim': [8, 320], 'batch_size': 8, 'flops': 6328014815744}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5062411866112}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5062411866112}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5695213340928}, {'type': 'perplexity', 'in_batch_dim': [8, 320], 'batch_size': 8, 'flops': 6328014815744}, {'type': 'perplexity', 'in_batch_dim': [8, 384], 'batch_size': 8, 'flops': 7593617765376}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5062411866112}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4746011128704}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4113209653888}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4746011128704}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4746011128704}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3480408179072}, {'type': 'perplexity', 'in_batch_dim': [8, 320], 'batch_size': 8, 'flops': 6328014815744}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5695213340928}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5062411866112}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4113209653888}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4746011128704}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2847606704256}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3796808916480}, {'type': 'perplexity', 'in_batch_dim': [8, 336], 'batch_size': 8, 'flops': 6644415553152}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4113209653888}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4746011128704}, {'type': 'perplexity', 'in_batch_dim': [8, 496], 'batch_size': 8, 'flops': 9808422927232}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5378812603520}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5378812603520}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4429610391296}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5062411866112}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6011614078336}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4746011128704}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3796808916480}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4113209653888}, {'type': 'perplexity', 'in_batch_dim': [8, 496], 'batch_size': 8, 'flops': 9808422927232}, {'type': 'perplexity', 'in_batch_dim': [8, 448], 'batch_size': 8, 'flops': 8859220715008}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6011614078336}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3796808916480}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3480408179072}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5062411866112}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4429610391296}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4113209653888}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4746011128704}, {'type': 'perplexity', 'in_batch_dim': [8, 320], 'batch_size': 8, 'flops': 6328014815744}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6011614078336}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3480408179072}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3796808916480}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3796808916480}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4746011128704}, {'type': 'perplexity', 'in_batch_dim': [8, 416], 'batch_size': 8, 'flops': 8226419240192}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5695213340928}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6011614078336}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6011614078336}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4746011128704}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3480408179072}, {'type': 'perplexity', 'in_batch_dim': [8, 160], 'batch_size': 8, 'flops': 3164007441664}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5695213340928}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3480408179072}, {'type': 'perplexity', 'in_batch_dim': [8, 400], 'batch_size': 8, 'flops': 7910018502784}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4429610391296}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5378812603520}, {'type': 'perplexity', 'in_batch_dim': [8, 400], 'batch_size': 8, 'flops': 7910018502784}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5378812603520}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3480408179072}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6011614078336}, {'type': 'perplexity', 'in_batch_dim': [8, 160], 'batch_size': 8, 'flops': 3164007441664}, {'type': 'perplexity', 'in_batch_dim': [8, 352], 'batch_size': 8, 'flops': 6960816290560}, {'type': 'perplexity', 'in_batch_dim': [8, 320], 'batch_size': 8, 'flops': 6328014815744}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5062411866112}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5062411866112}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4113209653888}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4429610391296}, {'type': 'perplexity', 'in_batch_dim': [8, 352], 'batch_size': 8, 'flops': 6960816290560}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4113209653888}, {'type': 'perplexity', 'in_batch_dim': [8, 160], 'batch_size': 8, 'flops': 3164007441664}, {'type': 'perplexity', 'in_batch_dim': [8, 336], 'batch_size': 8, 'flops': 6644415553152}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4429610391296}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3796808916480}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5378812603520}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5378812603520}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4113209653888}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5378812603520}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3796808916480}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5378812603520}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5378812603520}, {'type': 'perplexity', 'in_batch_dim': [8, 160], 'batch_size': 8, 'flops': 3164007441664}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3480408179072}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6011614078336}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5695213340928}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5378812603520}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5378812603520}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4429610391296}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4746011128704}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6011614078336}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5062411866112}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6011614078336}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5378812603520}, {'type': 'perplexity', 'in_batch_dim': [8, 160], 'batch_size': 8, 'flops': 3164007441664}, {'type': 'perplexity', 'in_batch_dim': [8, 160], 'batch_size': 8, 'flops': 3164007441664}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5062411866112}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5062411866112}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3796808916480}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6011614078336}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4113209653888}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4746011128704}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5378812603520}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3480408179072}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3796808916480}, {'type': 'perplexity', 'in_batch_dim': [8, 320], 'batch_size': 8, 'flops': 6328014815744}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5062411866112}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4746011128704}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5062411866112}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4746011128704}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6011614078336}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3796808916480}, {'type': 'perplexity', 'in_batch_dim': [8, 160], 'batch_size': 8, 'flops': 3164007441664}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2847606704256}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4429610391296}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4746011128704}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5062411866112}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5378812603520}, {'type': 'perplexity', 'in_batch_dim': [8, 464], 'batch_size': 8, 'flops': 9175621452416}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4113209653888}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3480408179072}, {'type': 'perplexity', 'in_batch_dim': [8, 384], 'batch_size': 8, 'flops': 7593617765376}, {'type': 'perplexity', 'in_batch_dim': [8, 448], 'batch_size': 8, 'flops': 8859220715008}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5378812603520}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4113209653888}, {'type': 'perplexity', 'in_batch_dim': [8, 320], 'batch_size': 8, 'flops': 6328014815744}, {'type': 'perplexity', 'in_batch_dim': [8, 560], 'batch_size': 8, 'flops': 11074025876864}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5378812603520}, {'type': 'perplexity', 'in_batch_dim': [8, 160], 'batch_size': 8, 'flops': 3164007441664}, {'type': 'perplexity', 'in_batch_dim': [8, 320], 'batch_size': 8, 'flops': 6328014815744}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5062411866112}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6011614078336}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4746011128704}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6011614078336}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3796808916480}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4429610391296}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4113209653888}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4746011128704}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3480408179072}, {'type': 'perplexity', 'in_batch_dim': [8, 384], 'batch_size': 8, 'flops': 7593617765376}, {'type': 'perplexity', 'in_batch_dim': [8, 576], 'batch_size': 8, 'flops': 11390426614272}, {'type': 'perplexity', 'in_batch_dim': [8, 160], 'batch_size': 8, 'flops': 3164007441664}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4746011128704}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4746011128704}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4429610391296}, {'type': 'perplexity', 'in_batch_dim': [8, 336], 'batch_size': 8, 'flops': 6644415553152}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4746011128704}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3796808916480}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3796808916480}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5062411866112}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5378812603520}, {'type': 'perplexity', 'in_batch_dim': [8, 368], 'batch_size': 8, 'flops': 7277217027968}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3480408179072}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4113209653888}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4746011128704}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3480408179072}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4746011128704}, {'type': 'perplexity', 'in_batch_dim': [8, 1168], 'batch_size': 8, 'flops': 23097253898368}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5062411866112}, {'type': 'perplexity', 'in_batch_dim': [8, 160], 'batch_size': 8, 'flops': 3164007441664}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4746011128704}, {'type': 'perplexity', 'in_batch_dim': [8, 640], 'batch_size': 8, 'flops': 12656029563904}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5378812603520}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4429610391296}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4429610391296}, {'type': 'perplexity', 'in_batch_dim': [8, 384], 'batch_size': 8, 'flops': 7593617765376}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3480408179072}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4113209653888}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3796808916480}, {'type': 'perplexity', 'in_batch_dim': [8, 336], 'batch_size': 8, 'flops': 6644415553152}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3480408179072}, {'type': 'perplexity', 'in_batch_dim': [6, 208], 'batch_size': 8, 'flops': 4113209653888}], 'timestamp': '2025-09-10 02:20:21.241490', 'step': 2058, 'epoch': 1} {'type': 'pplx', 'content': 17864993.387790058, 'timestamp': '2025-09-10 02:20:21.246430', 'step': 2058, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 8068526078144}, 'timestamp': '2025-09-10 02:20:21.279535', 'step': 2058, 'epoch': 1} {'type': 'loss', 'content': 0.02261553891003132, 'timestamp': '2025-09-10 02:20:21.287744', 'step': 2059, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 336], 'flops': 9966940982208}, 'timestamp': '2025-09-10 02:20:21.323575', 'step': 2059, 'epoch': 1} {'type': 'loss', 'content': 0.007157010026276112, 'timestamp': '2025-09-10 02:20:21.357682', 'step': 2060, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 304], 'flops': 9017733530176}, 'timestamp': '2025-09-10 02:20:21.389763', 'step': 2060, 'epoch': 1} {'type': 'loss', 'content': 0.026648053899407387, 'timestamp': '2025-09-10 02:20:21.398755', 'step': 2061, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 6644714900096}, 'timestamp': '2025-09-10 02:20:21.430854', 'step': 2061, 'epoch': 1} {'type': 'loss', 'content': 0.006498201750218868, 'timestamp': '2025-09-10 02:20:21.437552', 'step': 2062, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 7593922352128}, 'timestamp': '2025-09-10 02:20:21.469463', 'step': 2062, 'epoch': 1} {'type': 'loss', 'content': 0.024175258353352547, 'timestamp': '2025-09-10 02:20:21.476964', 'step': 2063, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 7119318626112}, 'timestamp': '2025-09-10 02:20:21.509159', 'step': 2063, 'epoch': 1} {'type': 'loss', 'content': 0.007673321757465601, 'timestamp': '2025-09-10 02:20:21.537240', 'step': 2064, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 4746299996032}, 'timestamp': '2025-09-10 02:20:21.569051', 'step': 2064, 'epoch': 1} {'type': 'loss', 'content': 0.005579050164669752, 'timestamp': '2025-09-10 02:20:21.571289', 'step': 2065, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 7119318626112}, 'timestamp': '2025-09-10 02:20:21.603339', 'step': 2065, 'epoch': 1} {'type': 'loss', 'content': 0.010995094664394855, 'timestamp': '2025-09-10 02:20:21.610701', 'step': 2066, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 304], 'flops': 9017733530176}, 'timestamp': '2025-09-10 02:20:21.643363', 'step': 2066, 'epoch': 1} {'type': 'loss', 'content': 0.03568677604198456, 'timestamp': '2025-09-10 02:20:21.655675', 'step': 2067, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 304], 'flops': 9017733530176}, 'timestamp': '2025-09-10 02:20:21.687456', 'step': 2067, 'epoch': 1} {'type': 'loss', 'content': 0.00576269906014204, 'timestamp': '2025-09-10 02:20:21.720291', 'step': 2068, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 7119318626112}, 'timestamp': '2025-09-10 02:20:21.753413', 'step': 2068, 'epoch': 1} {'type': 'loss', 'content': 0.014195759780704975, 'timestamp': '2025-09-10 02:20:21.758422', 'step': 2069, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 8543129804160}, 'timestamp': '2025-09-10 02:20:21.792123', 'step': 2069, 'epoch': 1} {'type': 'loss', 'content': 0.0028361105360090733, 'timestamp': '2025-09-10 02:20:21.802984', 'step': 2070, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 7119318626112}, 'timestamp': '2025-09-10 02:20:21.837230', 'step': 2070, 'epoch': 1} {'type': 'loss', 'content': 0.003535019000992179, 'timestamp': '2025-09-10 02:20:21.844518', 'step': 2071, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 7119318626112}, 'timestamp': '2025-09-10 02:20:21.875162', 'step': 2071, 'epoch': 1} {'type': 'loss', 'content': 0.006695587653666735, 'timestamp': '2025-09-10 02:20:21.903471', 'step': 2072, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 6644714900096}, 'timestamp': '2025-09-10 02:20:21.933981', 'step': 2072, 'epoch': 1} {'type': 'loss', 'content': 0.017478538677096367, 'timestamp': '2025-09-10 02:20:21.938764', 'step': 2073, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 304], 'flops': 9017733530176}, 'timestamp': '2025-09-10 02:20:21.969929', 'step': 2073, 'epoch': 1} {'type': 'loss', 'content': 0.005236539524048567, 'timestamp': '2025-09-10 02:20:21.982093', 'step': 2074, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 352], 'flops': 10441544708224}, 'timestamp': '2025-09-10 02:20:22.016908', 'step': 2074, 'epoch': 1} {'type': 'loss', 'content': 0.020143600180745125, 'timestamp': '2025-09-10 02:20:22.030586', 'step': 2075, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-10 02:20:22.061615', 'step': 2075, 'epoch': 1} {'type': 'loss', 'content': 0.017758900299668312, 'timestamp': '2025-09-10 02:20:22.085241', 'step': 2076, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-10 02:20:22.115780', 'step': 2076, 'epoch': 1} {'type': 'loss', 'content': 0.014581401832401752, 'timestamp': '2025-09-10 02:20:22.118185', 'step': 2077, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 7593922352128}, 'timestamp': '2025-09-10 02:20:22.150212', 'step': 2077, 'epoch': 1} {'type': 'loss', 'content': 0.0038522446993738413, 'timestamp': '2025-09-10 02:20:22.157895', 'step': 2078, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 7593922352128}, 'timestamp': '2025-09-10 02:20:22.191209', 'step': 2078, 'epoch': 1} {'type': 'loss', 'content': 0.006819657050073147, 'timestamp': '2025-09-10 02:20:22.198954', 'step': 2079, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 512], 'flops': 15187581968384}, 'timestamp': '2025-09-10 02:20:22.241658', 'step': 2079, 'epoch': 1} {'type': 'loss', 'content': 0.0016683044377714396, 'timestamp': '2025-09-10 02:20:22.280290', 'step': 2080, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 7593922352128}, 'timestamp': '2025-09-10 02:20:22.311231', 'step': 2080, 'epoch': 1} {'type': 'loss', 'content': 0.006357488688081503, 'timestamp': '2025-09-10 02:20:22.316660', 'step': 2081, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 6644714900096}, 'timestamp': '2025-09-10 02:20:22.347407', 'step': 2081, 'epoch': 1} {'type': 'loss', 'content': 0.026342766359448433, 'timestamp': '2025-09-10 02:20:22.354544', 'step': 2082, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 304], 'flops': 9017733530176}, 'timestamp': '2025-09-10 02:20:22.385455', 'step': 2082, 'epoch': 1} {'type': 'loss', 'content': 0.021110793575644493, 'timestamp': '2025-09-10 02:20:22.397683', 'step': 2083, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 176], 'flops': 5220903722048}, 'timestamp': '2025-09-10 02:20:22.431408', 'step': 2083, 'epoch': 1} {'type': 'loss', 'content': 0.007887489162385464, 'timestamp': '2025-09-10 02:20:22.456451', 'step': 2084, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 400], 'flops': 11865355886272}, 'timestamp': '2025-09-10 02:20:22.493285', 'step': 2084, 'epoch': 1} {'type': 'loss', 'content': 0.01206361036747694, 'timestamp': '2025-09-10 02:20:22.508412', 'step': 2085, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 6644714900096}, 'timestamp': '2025-09-10 02:20:22.539607', 'step': 2085, 'epoch': 1} {'type': 'loss', 'content': 0.0012552806874737144, 'timestamp': '2025-09-10 02:20:22.546422', 'step': 2086, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 6644714900096}, 'timestamp': '2025-09-10 02:20:22.578638', 'step': 2086, 'epoch': 1} {'type': 'loss', 'content': 0.008134759962558746, 'timestamp': '2025-09-10 02:20:22.585542', 'step': 2087, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 384], 'flops': 11390752160256}, 'timestamp': '2025-09-10 02:20:22.620360', 'step': 2087, 'epoch': 1} {'type': 'loss', 'content': 0.0027143345214426517, 'timestamp': '2025-09-10 02:20:22.655226', 'step': 2088, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 4746299996032}, 'timestamp': '2025-09-10 02:20:22.688088', 'step': 2088, 'epoch': 1} {'type': 'loss', 'content': 0.0388091541826725, 'timestamp': '2025-09-10 02:20:22.690414', 'step': 2089, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 320], 'flops': 9492337256192}, 'timestamp': '2025-09-10 02:20:22.723556', 'step': 2089, 'epoch': 1} {'type': 'loss', 'content': 0.0491640567779541, 'timestamp': '2025-09-10 02:20:22.734527', 'step': 2090, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 400], 'flops': 11865355886272}, 'timestamp': '2025-09-10 02:20:22.772601', 'step': 2090, 'epoch': 1} {'type': 'loss', 'content': 0.006399436388164759, 'timestamp': '2025-09-10 02:20:22.788192', 'step': 2091, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 4746299996032}, 'timestamp': '2025-09-10 02:20:22.821167', 'step': 2091, 'epoch': 1} {'type': 'loss', 'content': 0.011510169133543968, 'timestamp': '2025-09-10 02:20:22.845097', 'step': 2092, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 400], 'flops': 11865355886272}, 'timestamp': '2025-09-10 02:20:22.881376', 'step': 2092, 'epoch': 1} {'type': 'loss', 'content': 0.07845025509595871, 'timestamp': '2025-09-10 02:20:22.896561', 'step': 2093, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 7593922352128}, 'timestamp': '2025-09-10 02:20:22.929539', 'step': 2093, 'epoch': 1} {'type': 'loss', 'content': 0.026220111176371574, 'timestamp': '2025-09-10 02:20:22.936638', 'step': 2094, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 8543129804160}, 'timestamp': '2025-09-10 02:20:22.968347', 'step': 2094, 'epoch': 1} {'type': 'loss', 'content': 0.02482834830880165, 'timestamp': '2025-09-10 02:20:22.978699', 'step': 2095, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 7119318626112}, 'timestamp': '2025-09-10 02:20:23.010557', 'step': 2095, 'epoch': 1} {'type': 'loss', 'content': 0.017762552946805954, 'timestamp': '2025-09-10 02:20:23.038344', 'step': 2096, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 8068526078144}, 'timestamp': '2025-09-10 02:20:23.071589', 'step': 2096, 'epoch': 1} {'type': 'loss', 'content': 0.0069184741005301476, 'timestamp': '2025-09-10 02:20:23.078394', 'step': 2097, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 6170111174080}, 'timestamp': '2025-09-10 02:20:23.110349', 'step': 2097, 'epoch': 1} {'type': 'loss', 'content': 0.008609072305262089, 'timestamp': '2025-09-10 02:20:23.116652', 'step': 2098, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 304], 'flops': 9017733530176}, 'timestamp': '2025-09-10 02:20:23.147965', 'step': 2098, 'epoch': 1} {'type': 'loss', 'content': 0.001422119210474193, 'timestamp': '2025-09-10 02:20:23.159655', 'step': 2099, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 8543129804160}, 'timestamp': '2025-09-10 02:20:23.193309', 'step': 2099, 'epoch': 1} {'type': 'loss', 'content': 0.006961984094232321, 'timestamp': '2025-09-10 02:20:23.224650', 'step': 2100, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 4746299996032}, 'timestamp': '2025-09-10 02:20:23.255497', 'step': 2100, 'epoch': 1} {'type': 'loss', 'content': 0.0065904962830245495, 'timestamp': '2025-09-10 02:20:23.257675', 'step': 2101, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 5695507448064}, 'timestamp': '2025-09-10 02:20:23.288200', 'step': 2101, 'epoch': 1} {'type': 'loss', 'content': 0.004559192340821028, 'timestamp': '2025-09-10 02:20:23.292701', 'step': 2102, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 7119318626112}, 'timestamp': '2025-09-10 02:20:23.324242', 'step': 2102, 'epoch': 1} {'type': 'loss', 'content': 0.01074813213199377, 'timestamp': '2025-09-10 02:20:23.331808', 'step': 2103, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 8543129804160}, 'timestamp': '2025-09-10 02:20:23.363551', 'step': 2103, 'epoch': 1} {'type': 'loss', 'content': 0.017620306462049484, 'timestamp': '2025-09-10 02:20:23.395347', 'step': 2104, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 336], 'flops': 9966940982208}, 'timestamp': '2025-09-10 02:20:23.427142', 'step': 2104, 'epoch': 1} {'type': 'loss', 'content': 0.008062370121479034, 'timestamp': '2025-09-10 02:20:23.439784', 'step': 2105, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 5695507448064}, 'timestamp': '2025-09-10 02:20:23.471788', 'step': 2105, 'epoch': 1} {'type': 'loss', 'content': 0.029557526111602783, 'timestamp': '2025-09-10 02:20:23.476036', 'step': 2106, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 336], 'flops': 9966940982208}, 'timestamp': '2025-09-10 02:20:23.510903', 'step': 2106, 'epoch': 1} {'type': 'loss', 'content': 0.014713338576257229, 'timestamp': '2025-09-10 02:20:23.524282', 'step': 2107, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 5695507448064}, 'timestamp': '2025-09-10 02:20:23.555121', 'step': 2107, 'epoch': 1} {'type': 'loss', 'content': 0.011615300551056862, 'timestamp': '2025-09-10 02:20:23.580582', 'step': 2108, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 8068526078144}, 'timestamp': '2025-09-10 02:20:23.611130', 'step': 2108, 'epoch': 1} {'type': 'loss', 'content': 0.0011783967493101954, 'timestamp': '2025-09-10 02:20:23.619196', 'step': 2109, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 8543129804160}, 'timestamp': '2025-09-10 02:20:23.651276', 'step': 2109, 'epoch': 1} {'type': 'loss', 'content': 0.006283028516918421, 'timestamp': '2025-09-10 02:20:23.662054', 'step': 2110, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 336], 'flops': 9966940982208}, 'timestamp': '2025-09-10 02:20:23.695199', 'step': 2110, 'epoch': 1} {'type': 'loss', 'content': 0.015238684602081776, 'timestamp': '2025-09-10 02:20:23.708698', 'step': 2111, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 8068526078144}, 'timestamp': '2025-09-10 02:20:23.740097', 'step': 2111, 'epoch': 1} {'type': 'loss', 'content': 0.0074634556658566, 'timestamp': '2025-09-10 02:20:23.770592', 'step': 2112, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 7593922352128}, 'timestamp': '2025-09-10 02:20:23.805506', 'step': 2112, 'epoch': 1} {'type': 'loss', 'content': 0.011353401467204094, 'timestamp': '2025-09-10 02:20:23.809323', 'step': 2113, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 6170111174080}, 'timestamp': '2025-09-10 02:20:23.843406', 'step': 2113, 'epoch': 1} {'type': 'loss', 'content': 0.021240845322608948, 'timestamp': '2025-09-10 02:20:23.848788', 'step': 2114, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 6170111174080}, 'timestamp': '2025-09-10 02:20:23.883014', 'step': 2114, 'epoch': 1} {'type': 'loss', 'content': 0.018865080550312996, 'timestamp': '2025-09-10 02:20:23.888498', 'step': 2115, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 7119318626112}, 'timestamp': '2025-09-10 02:20:23.922213', 'step': 2115, 'epoch': 1} {'type': 'loss', 'content': 0.019352329894900322, 'timestamp': '2025-09-10 02:20:23.949465', 'step': 2116, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 6170111174080}, 'timestamp': '2025-09-10 02:20:23.983621', 'step': 2116, 'epoch': 1} {'type': 'loss', 'content': 0.01287752389907837, 'timestamp': '2025-09-10 02:20:23.986675', 'step': 2117, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 5695507448064}, 'timestamp': '2025-09-10 02:20:24.020402', 'step': 2117, 'epoch': 1} {'type': 'loss', 'content': 0.03353200852870941, 'timestamp': '2025-09-10 02:20:24.023711', 'step': 2118, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-10 02:20:24.057404', 'step': 2118, 'epoch': 1} {'type': 'loss', 'content': 0.0233648419380188, 'timestamp': '2025-09-10 02:20:24.059721', 'step': 2119, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 304], 'flops': 9017733530176}, 'timestamp': '2025-09-10 02:20:24.092936', 'step': 2119, 'epoch': 1} {'type': 'loss', 'content': 0.03482295200228691, 'timestamp': '2025-09-10 02:20:24.125226', 'step': 2120, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 176], 'flops': 5220903722048}, 'timestamp': '2025-09-10 02:20:24.156601', 'step': 2120, 'epoch': 1} {'type': 'loss', 'content': 0.02343512885272503, 'timestamp': '2025-09-10 02:20:24.158914', 'step': 2121, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 7119318626112}, 'timestamp': '2025-09-10 02:20:24.190199', 'step': 2121, 'epoch': 1} {'type': 'loss', 'content': 0.022608637809753418, 'timestamp': '2025-09-10 02:20:24.197547', 'step': 2122, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 8068526078144}, 'timestamp': '2025-09-10 02:20:24.228337', 'step': 2122, 'epoch': 1} {'type': 'loss', 'content': 0.0130180474370718, 'timestamp': '2025-09-10 02:20:24.238347', 'step': 2123, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 6170111174080}, 'timestamp': '2025-09-10 02:20:24.269288', 'step': 2123, 'epoch': 1} {'type': 'loss', 'content': 0.014483482576906681, 'timestamp': '2025-09-10 02:20:24.296619', 'step': 2124, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 8543129804160}, 'timestamp': '2025-09-10 02:20:24.328265', 'step': 2124, 'epoch': 1} {'type': 'loss', 'content': 0.05478040128946304, 'timestamp': '2025-09-10 02:20:24.336772', 'step': 2125, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 6644714900096}, 'timestamp': '2025-09-10 02:20:24.367265', 'step': 2125, 'epoch': 1} {'type': 'loss', 'content': 0.00959568191319704, 'timestamp': '2025-09-10 02:20:24.374201', 'step': 2126, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 5695507448064}, 'timestamp': '2025-09-10 02:20:24.406908', 'step': 2126, 'epoch': 1} {'type': 'loss', 'content': 0.012538508512079716, 'timestamp': '2025-09-10 02:20:24.411278', 'step': 2127, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 8068526078144}, 'timestamp': '2025-09-10 02:20:24.442470', 'step': 2127, 'epoch': 1} {'type': 'loss', 'content': 0.0026257862336933613, 'timestamp': '2025-09-10 02:20:24.473702', 'step': 2128, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 4746299996032}, 'timestamp': '2025-09-10 02:20:24.503894', 'step': 2128, 'epoch': 1} {'type': 'loss', 'content': 0.018345599994063377, 'timestamp': '2025-09-10 02:20:24.506139', 'step': 2129, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 5695507448064}, 'timestamp': '2025-09-10 02:20:24.537211', 'step': 2129, 'epoch': 1} {'type': 'loss', 'content': 0.014367531053721905, 'timestamp': '2025-09-10 02:20:24.544118', 'step': 2130, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 4746299996032}, 'timestamp': '2025-09-10 02:20:24.583299', 'step': 2130, 'epoch': 1} {'type': 'loss', 'content': 0.0052671851590275764, 'timestamp': '2025-09-10 02:20:24.587542', 'step': 2131, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 8068526078144}, 'timestamp': '2025-09-10 02:20:24.628092', 'step': 2131, 'epoch': 1} {'type': 'loss', 'content': 0.017293930053710938, 'timestamp': '2025-09-10 02:20:24.659286', 'step': 2132, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 320], 'flops': 9492337256192}, 'timestamp': '2025-09-10 02:20:24.691742', 'step': 2132, 'epoch': 1} {'type': 'loss', 'content': 0.0051933168433606625, 'timestamp': '2025-09-10 02:20:24.702182', 'step': 2133, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 6170111174080}, 'timestamp': '2025-09-10 02:20:24.733405', 'step': 2133, 'epoch': 1} {'type': 'loss', 'content': 0.008713253773748875, 'timestamp': '2025-09-10 02:20:24.740295', 'step': 2134, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 304], 'flops': 9017733530176}, 'timestamp': '2025-09-10 02:20:24.771556', 'step': 2134, 'epoch': 1} {'type': 'loss', 'content': 0.010111020877957344, 'timestamp': '2025-09-10 02:20:24.783335', 'step': 2135, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 400], 'flops': 11865355886272}, 'timestamp': '2025-09-10 02:20:24.821841', 'step': 2135, 'epoch': 1} {'type': 'loss', 'content': 0.014230245724320412, 'timestamp': '2025-09-10 02:20:24.858442', 'step': 2136, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 368], 'flops': 10916148434240}, 'timestamp': '2025-09-10 02:20:24.892148', 'step': 2136, 'epoch': 1} {'type': 'loss', 'content': 0.013335539028048515, 'timestamp': '2025-09-10 02:20:24.905294', 'step': 2137, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 6170111174080}, 'timestamp': '2025-09-10 02:20:24.936397', 'step': 2137, 'epoch': 1} {'type': 'loss', 'content': 0.018712077289819717, 'timestamp': '2025-09-10 02:20:24.943181', 'step': 2138, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 336], 'flops': 9966940982208}, 'timestamp': '2025-09-10 02:20:24.976717', 'step': 2138, 'epoch': 1} {'type': 'loss', 'content': 0.028135867789387703, 'timestamp': '2025-09-10 02:20:24.990073', 'step': 2139, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 7119318626112}, 'timestamp': '2025-09-10 02:20:25.021402', 'step': 2139, 'epoch': 1} {'type': 'loss', 'content': 0.014833973720669746, 'timestamp': '2025-09-10 02:20:25.049743', 'step': 2140, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 4746299996032}, 'timestamp': '2025-09-10 02:20:25.079948', 'step': 2140, 'epoch': 1} {'type': 'loss', 'content': 0.005817878060042858, 'timestamp': '2025-09-10 02:20:25.082067', 'step': 2141, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 8068526078144}, 'timestamp': '2025-09-10 02:20:25.112835', 'step': 2141, 'epoch': 1} {'type': 'loss', 'content': 0.00543214799836278, 'timestamp': '2025-09-10 02:20:25.123106', 'step': 2142, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 8068526078144}, 'timestamp': '2025-09-10 02:20:25.153958', 'step': 2142, 'epoch': 1} {'type': 'loss', 'content': 0.0051437122747302055, 'timestamp': '2025-09-10 02:20:25.164268', 'step': 2143, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 336], 'flops': 9966940982208}, 'timestamp': '2025-09-10 02:20:25.198938', 'step': 2143, 'epoch': 1} {'type': 'loss', 'content': 0.004253858234733343, 'timestamp': '2025-09-10 02:20:25.233271', 'step': 2144, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 7119318626112}, 'timestamp': '2025-09-10 02:20:25.264621', 'step': 2144, 'epoch': 1} {'type': 'loss', 'content': 0.015032351948320866, 'timestamp': '2025-09-10 02:20:25.269699', 'step': 2145, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 320], 'flops': 9492337256192}, 'timestamp': '2025-09-10 02:20:25.300931', 'step': 2145, 'epoch': 1} {'type': 'loss', 'content': 0.010134616866707802, 'timestamp': '2025-09-10 02:20:25.313334', 'step': 2146, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 7119318626112}, 'timestamp': '2025-09-10 02:20:25.344866', 'step': 2146, 'epoch': 1} {'type': 'loss', 'content': 0.003961100243031979, 'timestamp': '2025-09-10 02:20:25.352268', 'step': 2147, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 7119318626112}, 'timestamp': '2025-09-10 02:20:25.383479', 'step': 2147, 'epoch': 1} {'type': 'loss', 'content': 0.014108018949627876, 'timestamp': '2025-09-10 02:20:25.411763', 'step': 2148, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 7119318626112}, 'timestamp': '2025-09-10 02:20:25.442846', 'step': 2148, 'epoch': 1} {'type': 'loss', 'content': 0.0111985569819808, 'timestamp': '2025-09-10 02:20:25.447948', 'step': 2149, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-10 02:20:25.477822', 'step': 2149, 'epoch': 1} {'type': 'loss', 'content': 0.02083497866988182, 'timestamp': '2025-09-10 02:20:25.480008', 'step': 2150, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 384], 'flops': 11390752160256}, 'timestamp': '2025-09-10 02:20:25.514467', 'step': 2150, 'epoch': 1} {'type': 'loss', 'content': 0.005785502027720213, 'timestamp': '2025-09-10 02:20:25.528509', 'step': 2151, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 6170111174080}, 'timestamp': '2025-09-10 02:20:25.559732', 'step': 2151, 'epoch': 1} {'type': 'loss', 'content': 0.008959316648542881, 'timestamp': '2025-09-10 02:20:25.587452', 'step': 2152, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 320], 'flops': 9492337256192}, 'timestamp': '2025-09-10 02:20:25.618341', 'step': 2152, 'epoch': 1} {'type': 'loss', 'content': 0.008241984061896801, 'timestamp': '2025-09-10 02:20:25.628840', 'step': 2153, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 7593922352128}, 'timestamp': '2025-09-10 02:20:25.659730', 'step': 2153, 'epoch': 1} {'type': 'loss', 'content': 0.006875708233565092, 'timestamp': '2025-09-10 02:20:25.667335', 'step': 2154, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 7119318626112}, 'timestamp': '2025-09-10 02:20:25.701679', 'step': 2154, 'epoch': 1} {'type': 'loss', 'content': 0.0037335853558033705, 'timestamp': '2025-09-10 02:20:25.709348', 'step': 2155, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 6170111174080}, 'timestamp': '2025-09-10 02:20:25.740435', 'step': 2155, 'epoch': 1} {'type': 'loss', 'content': 0.020355254411697388, 'timestamp': '2025-09-10 02:20:25.768161', 'step': 2156, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 368], 'flops': 10916148434240}, 'timestamp': '2025-09-10 02:20:25.801811', 'step': 2156, 'epoch': 1} {'type': 'loss', 'content': 0.009400454349815845, 'timestamp': '2025-09-10 02:20:25.814913', 'step': 2157, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 6644714900096}, 'timestamp': '2025-09-10 02:20:25.846853', 'step': 2157, 'epoch': 1} {'type': 'loss', 'content': 0.006258614361286163, 'timestamp': '2025-09-10 02:20:25.853851', 'step': 2158, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 6644714900096}, 'timestamp': '2025-09-10 02:20:25.884388', 'step': 2158, 'epoch': 1} {'type': 'loss', 'content': 0.008656726218760014, 'timestamp': '2025-09-10 02:20:25.891425', 'step': 2159, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 6644714900096}, 'timestamp': '2025-09-10 02:20:25.921831', 'step': 2159, 'epoch': 1} {'type': 'loss', 'content': 0.004763355012983084, 'timestamp': '2025-09-10 02:20:25.949770', 'step': 2160, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 304], 'flops': 9017733530176}, 'timestamp': '2025-09-10 02:20:25.981370', 'step': 2160, 'epoch': 1} {'type': 'loss', 'content': 0.008526794612407684, 'timestamp': '2025-09-10 02:20:25.991080', 'step': 2161, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 6170111174080}, 'timestamp': '2025-09-10 02:20:26.022544', 'step': 2161, 'epoch': 1} {'type': 'loss', 'content': 0.055472858250141144, 'timestamp': '2025-09-10 02:20:26.029601', 'step': 2162, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 176], 'flops': 5220903722048}, 'timestamp': '2025-09-10 02:20:26.061022', 'step': 2162, 'epoch': 1} {'type': 'loss', 'content': 0.0049760532565414906, 'timestamp': '2025-09-10 02:20:26.065071', 'step': 2163, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 368], 'flops': 10916148434240}, 'timestamp': '2025-09-10 02:20:26.100965', 'step': 2163, 'epoch': 1} {'type': 'loss', 'content': 0.02341938205063343, 'timestamp': '2025-09-10 02:20:26.135653', 'step': 2164, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 7593922352128}, 'timestamp': '2025-09-10 02:20:26.167456', 'step': 2164, 'epoch': 1} {'type': 'loss', 'content': 0.01208476722240448, 'timestamp': '2025-09-10 02:20:26.172621', 'step': 2165, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 8068526078144}, 'timestamp': '2025-09-10 02:20:26.204912', 'step': 2165, 'epoch': 1} {'type': 'loss', 'content': 0.005906891077756882, 'timestamp': '2025-09-10 02:20:26.215000', 'step': 2166, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 8068526078144}, 'timestamp': '2025-09-10 02:20:26.246522', 'step': 2166, 'epoch': 1} {'type': 'loss', 'content': 0.001993507379665971, 'timestamp': '2025-09-10 02:20:26.256675', 'step': 2167, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 8068526078144}, 'timestamp': '2025-09-10 02:20:26.288004', 'step': 2167, 'epoch': 1} {'type': 'loss', 'content': 0.0028882776387035847, 'timestamp': '2025-09-10 02:20:26.319213', 'step': 2168, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 5695507448064}, 'timestamp': '2025-09-10 02:20:26.350083', 'step': 2168, 'epoch': 1} {'type': 'loss', 'content': 0.0018221037462353706, 'timestamp': '2025-09-10 02:20:26.352302', 'step': 2169, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 6170111174080}, 'timestamp': '2025-09-10 02:20:26.383804', 'step': 2169, 'epoch': 1} {'type': 'loss', 'content': 0.012003413401544094, 'timestamp': '2025-09-10 02:20:26.390523', 'step': 2170, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 8068526078144}, 'timestamp': '2025-09-10 02:20:26.421987', 'step': 2170, 'epoch': 1} {'type': 'loss', 'content': 0.005830215755850077, 'timestamp': '2025-09-10 02:20:26.432042', 'step': 2171, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 6170111174080}, 'timestamp': '2025-09-10 02:20:26.463153', 'step': 2171, 'epoch': 1} {'type': 'loss', 'content': 0.025055332109332085, 'timestamp': '2025-09-10 02:20:26.490728', 'step': 2172, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 7119318626112}, 'timestamp': '2025-09-10 02:20:26.522171', 'step': 2172, 'epoch': 1} {'type': 'loss', 'content': 0.020759152248501778, 'timestamp': '2025-09-10 02:20:26.527153', 'step': 2173, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 6644714900096}, 'timestamp': '2025-09-10 02:20:26.558543', 'step': 2173, 'epoch': 1} {'type': 'loss', 'content': 0.0036895188968628645, 'timestamp': '2025-09-10 02:20:26.565459', 'step': 2174, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 8543129804160}, 'timestamp': '2025-09-10 02:20:26.599427', 'step': 2174, 'epoch': 1} {'type': 'loss', 'content': 0.021146830171346664, 'timestamp': '2025-09-10 02:20:26.610450', 'step': 2175, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 320], 'flops': 9492337256192}, 'timestamp': '2025-09-10 02:20:26.642089', 'step': 2175, 'epoch': 1} {'type': 'loss', 'content': 0.03294152021408081, 'timestamp': '2025-09-10 02:20:26.675459', 'step': 2176, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 176], 'flops': 5220903722048}, 'timestamp': '2025-09-10 02:20:26.707514', 'step': 2176, 'epoch': 1} {'type': 'loss', 'content': 0.028937892988324165, 'timestamp': '2025-09-10 02:20:26.709724', 'step': 2177, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 4746299996032}, 'timestamp': '2025-09-10 02:20:26.741051', 'step': 2177, 'epoch': 1} {'type': 'loss', 'content': 0.005948805715888739, 'timestamp': '2025-09-10 02:20:26.743695', 'step': 2178, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 4746299996032}, 'timestamp': '2025-09-10 02:20:26.775324', 'step': 2178, 'epoch': 1} {'type': 'loss', 'content': 0.014749663881957531, 'timestamp': '2025-09-10 02:20:26.778142', 'step': 2179, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 336], 'flops': 9966940982208}, 'timestamp': '2025-09-10 02:20:26.814127', 'step': 2179, 'epoch': 1} {'type': 'loss', 'content': 0.022330453619360924, 'timestamp': '2025-09-10 02:20:26.848408', 'step': 2180, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 6644714900096}, 'timestamp': '2025-09-10 02:20:26.880605', 'step': 2180, 'epoch': 1} {'type': 'loss', 'content': 0.009073344990611076, 'timestamp': '2025-09-10 02:20:26.885273', 'step': 2181, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 7593922352128}, 'timestamp': '2025-09-10 02:20:26.917146', 'step': 2181, 'epoch': 1} {'type': 'loss', 'content': 0.00725422753021121, 'timestamp': '2025-09-10 02:20:26.924616', 'step': 2182, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 176], 'flops': 5220903722048}, 'timestamp': '2025-09-10 02:20:26.955341', 'step': 2182, 'epoch': 1} {'type': 'loss', 'content': 0.03919028118252754, 'timestamp': '2025-09-10 02:20:26.959402', 'step': 2183, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 8068526078144}, 'timestamp': '2025-09-10 02:20:26.990765', 'step': 2183, 'epoch': 1} {'type': 'loss', 'content': 0.03313456103205681, 'timestamp': '2025-09-10 02:20:27.021647', 'step': 2184, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 176], 'flops': 5220903722048}, 'timestamp': '2025-09-10 02:20:27.052764', 'step': 2184, 'epoch': 1} {'type': 'loss', 'content': 0.023840798065066338, 'timestamp': '2025-09-10 02:20:27.055205', 'step': 2185, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 5695507448064}, 'timestamp': '2025-09-10 02:20:27.087124', 'step': 2185, 'epoch': 1} {'type': 'loss', 'content': 0.008154499344527721, 'timestamp': '2025-09-10 02:20:27.091545', 'step': 2186, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 368], 'flops': 10916148434240}, 'timestamp': '2025-09-10 02:20:27.125598', 'step': 2186, 'epoch': 1} {'type': 'loss', 'content': 0.021896088495850563, 'timestamp': '2025-09-10 02:20:27.139435', 'step': 2187, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 6170111174080}, 'timestamp': '2025-09-10 02:20:27.170364', 'step': 2187, 'epoch': 1} {'type': 'loss', 'content': 0.02253473922610283, 'timestamp': '2025-09-10 02:20:27.198062', 'step': 2188, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 5695507448064}, 'timestamp': '2025-09-10 02:20:27.229184', 'step': 2188, 'epoch': 1} {'type': 'loss', 'content': 0.020850541070103645, 'timestamp': '2025-09-10 02:20:27.231530', 'step': 2189, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 7593922352128}, 'timestamp': '2025-09-10 02:20:27.263642', 'step': 2189, 'epoch': 1} {'type': 'loss', 'content': 0.01595452055335045, 'timestamp': '2025-09-10 02:20:27.271524', 'step': 2190, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 464], 'flops': 13763770790336}, 'timestamp': '2025-09-10 02:20:27.313229', 'step': 2190, 'epoch': 1} {'type': 'loss', 'content': 0.014829051680862904, 'timestamp': '2025-09-10 02:20:27.330363', 'step': 2191, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 4746299996032}, 'timestamp': '2025-09-10 02:20:27.361318', 'step': 2191, 'epoch': 1} {'type': 'loss', 'content': 0.004625355359166861, 'timestamp': '2025-09-10 02:20:27.385169', 'step': 2192, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 6644714900096}, 'timestamp': '2025-09-10 02:20:27.418130', 'step': 2192, 'epoch': 1} {'type': 'loss', 'content': 0.004544160328805447, 'timestamp': '2025-09-10 02:20:27.422481', 'step': 2193, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 320], 'flops': 9492337256192}, 'timestamp': '2025-09-10 02:20:27.454944', 'step': 2193, 'epoch': 1} {'type': 'loss', 'content': 0.004428909160196781, 'timestamp': '2025-09-10 02:20:27.467282', 'step': 2194, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 7119318626112}, 'timestamp': '2025-09-10 02:20:27.499428', 'step': 2194, 'epoch': 1} {'type': 'loss', 'content': 0.002593469340354204, 'timestamp': '2025-09-10 02:20:27.506749', 'step': 2195, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 8543129804160}, 'timestamp': '2025-09-10 02:20:27.538937', 'step': 2195, 'epoch': 1} {'type': 'loss', 'content': 0.0045441207475960255, 'timestamp': '2025-09-10 02:20:27.570800', 'step': 2196, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 176], 'flops': 5220903722048}, 'timestamp': '2025-09-10 02:20:27.603970', 'step': 2196, 'epoch': 1} {'type': 'loss', 'content': 0.0163312666118145, 'timestamp': '2025-09-10 02:20:27.606468', 'step': 2197, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 4746299996032}, 'timestamp': '2025-09-10 02:20:27.638454', 'step': 2197, 'epoch': 1} {'type': 'loss', 'content': 0.008555804379284382, 'timestamp': '2025-09-10 02:20:27.641211', 'step': 2198, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-10 02:20:27.672756', 'step': 2198, 'epoch': 1} {'type': 'loss', 'content': 0.008500153198838234, 'timestamp': '2025-09-10 02:20:27.675218', 'step': 2199, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 5695507448064}, 'timestamp': '2025-09-10 02:20:27.707569', 'step': 2199, 'epoch': 1} {'type': 'loss', 'content': 0.006550853606313467, 'timestamp': '2025-09-10 02:20:27.732976', 'step': 2200, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 5695507448064}, 'timestamp': '2025-09-10 02:20:27.764385', 'step': 2200, 'epoch': 1} {'type': 'loss', 'content': 0.013699221424758434, 'timestamp': '2025-09-10 02:20:27.766855', 'step': 2201, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 7593922352128}, 'timestamp': '2025-09-10 02:20:27.797602', 'step': 2201, 'epoch': 1} {'type': 'loss', 'content': 0.008543393574655056, 'timestamp': '2025-09-10 02:20:27.805319', 'step': 2202, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 6170111174080}, 'timestamp': '2025-09-10 02:20:27.837410', 'step': 2202, 'epoch': 1} {'type': 'loss', 'content': 0.01858612895011902, 'timestamp': '2025-09-10 02:20:27.844392', 'step': 2203, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 176], 'flops': 5220903722048}, 'timestamp': '2025-09-10 02:20:27.875489', 'step': 2203, 'epoch': 1} {'type': 'loss', 'content': 0.01539852935820818, 'timestamp': '2025-09-10 02:20:27.900629', 'step': 2204, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 6644714900096}, 'timestamp': '2025-09-10 02:20:27.931630', 'step': 2204, 'epoch': 1} {'type': 'loss', 'content': 0.0037464885972440243, 'timestamp': '2025-09-10 02:20:27.936471', 'step': 2205, 'epoch': 1} {'type': 'flops', 'content': [{'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5695213340928}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3796808916480}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4429610391296}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4746011128704}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4746011128704}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6011614078336}, {'type': 'perplexity', 'in_batch_dim': [8, 736], 'batch_size': 8, 'flops': 14554433988352}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3796808916480}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4113209653888}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3796808916480}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4746011128704}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2847606704256}, {'type': 'perplexity', 'in_batch_dim': [8, 320], 'batch_size': 8, 'flops': 6328014815744}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5378812603520}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3796808916480}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4746011128704}, {'type': 'perplexity', 'in_batch_dim': [8, 352], 'batch_size': 8, 'flops': 6960816290560}, {'type': 'perplexity', 'in_batch_dim': [8, 384], 'batch_size': 8, 'flops': 7593617765376}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5378812603520}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4429610391296}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4113209653888}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5062411866112}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5062411866112}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4429610391296}, {'type': 'perplexity', 'in_batch_dim': [8, 480], 'batch_size': 8, 'flops': 9492022189824}, {'type': 'perplexity', 'in_batch_dim': [8, 448], 'batch_size': 8, 'flops': 8859220715008}, {'type': 'perplexity', 'in_batch_dim': [8, 320], 'batch_size': 8, 'flops': 6328014815744}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3796808916480}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6011614078336}, {'type': 'perplexity', 'in_batch_dim': [8, 320], 'batch_size': 8, 'flops': 6328014815744}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5695213340928}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4746011128704}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4429610391296}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5378812603520}, {'type': 'perplexity', 'in_batch_dim': [8, 544], 'batch_size': 8, 'flops': 10757625139456}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4429610391296}, {'type': 'perplexity', 'in_batch_dim': [8, 320], 'batch_size': 8, 'flops': 6328014815744}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4113209653888}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4113209653888}, {'type': 'perplexity', 'in_batch_dim': [8, 320], 'batch_size': 8, 'flops': 6328014815744}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3480408179072}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5062411866112}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6011614078336}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4429610391296}, {'type': 'perplexity', 'in_batch_dim': [8, 384], 'batch_size': 8, 'flops': 7593617765376}, {'type': 'perplexity', 'in_batch_dim': [8, 416], 'batch_size': 8, 'flops': 8226419240192}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4746011128704}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3480408179072}, {'type': 'perplexity', 'in_batch_dim': [8, 384], 'batch_size': 8, 'flops': 7593617765376}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3480408179072}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3480408179072}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5378812603520}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3796808916480}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4113209653888}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5062411866112}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5378812603520}, {'type': 'perplexity', 'in_batch_dim': [8, 320], 'batch_size': 8, 'flops': 6328014815744}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4429610391296}, {'type': 'perplexity', 'in_batch_dim': [8, 160], 'batch_size': 8, 'flops': 3164007441664}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4113209653888}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5695213340928}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3796808916480}, {'type': 'perplexity', 'in_batch_dim': [8, 160], 'batch_size': 8, 'flops': 3164007441664}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6011614078336}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5062411866112}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4429610391296}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3480408179072}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4746011128704}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3480408179072}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4113209653888}, {'type': 'perplexity', 'in_batch_dim': [8, 624], 'batch_size': 8, 'flops': 12339628826496}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4113209653888}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5695213340928}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5062411866112}, {'type': 'perplexity', 'in_batch_dim': [8, 384], 'batch_size': 8, 'flops': 7593617765376}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4429610391296}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4113209653888}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5062411866112}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4429610391296}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5695213340928}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3796808916480}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5378812603520}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4429610391296}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5378812603520}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3796808916480}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5695213340928}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4429610391296}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5062411866112}, {'type': 'perplexity', 'in_batch_dim': [8, 160], 'batch_size': 8, 'flops': 3164007441664}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4746011128704}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5695213340928}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3796808916480}, {'type': 'perplexity', 'in_batch_dim': [8, 416], 'batch_size': 8, 'flops': 8226419240192}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3796808916480}, {'type': 'perplexity', 'in_batch_dim': [8, 160], 'batch_size': 8, 'flops': 3164007441664}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4746011128704}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3796808916480}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2847606704256}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5378812603520}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4429610391296}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4113209653888}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4429610391296}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6011614078336}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4113209653888}, {'type': 'perplexity', 'in_batch_dim': [8, 320], 'batch_size': 8, 'flops': 6328014815744}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4746011128704}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4746011128704}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3796808916480}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5062411866112}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3480408179072}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3796808916480}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3480408179072}, {'type': 'perplexity', 'in_batch_dim': [8, 352], 'batch_size': 8, 'flops': 6960816290560}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5062411866112}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4429610391296}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6011614078336}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5062411866112}, {'type': 'perplexity', 'in_batch_dim': [8, 576], 'batch_size': 8, 'flops': 11390426614272}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4429610391296}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3796808916480}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6011614078336}, {'type': 'perplexity', 'in_batch_dim': [8, 432], 'batch_size': 8, 'flops': 8542819977600}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5378812603520}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5378812603520}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5062411866112}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4113209653888}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4113209653888}, {'type': 'perplexity', 'in_batch_dim': [8, 368], 'batch_size': 8, 'flops': 7277217027968}, {'type': 'perplexity', 'in_batch_dim': [8, 160], 'batch_size': 8, 'flops': 3164007441664}, {'type': 'perplexity', 'in_batch_dim': [8, 368], 'batch_size': 8, 'flops': 7277217027968}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3796808916480}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4746011128704}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4113209653888}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4429610391296}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5695213340928}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5695213340928}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5062411866112}, {'type': 'perplexity', 'in_batch_dim': [8, 432], 'batch_size': 8, 'flops': 8542819977600}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5062411866112}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4113209653888}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3480408179072}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3480408179072}, {'type': 'perplexity', 'in_batch_dim': [8, 160], 'batch_size': 8, 'flops': 3164007441664}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4113209653888}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3796808916480}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5062411866112}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 368], 'batch_size': 8, 'flops': 7277217027968}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5378812603520}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5378812603520}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5062411866112}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2847606704256}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4429610391296}, {'type': 'perplexity', 'in_batch_dim': [8, 320], 'batch_size': 8, 'flops': 6328014815744}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3796808916480}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4429610391296}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4113209653888}, {'type': 'perplexity', 'in_batch_dim': [8, 384], 'batch_size': 8, 'flops': 7593617765376}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5378812603520}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4746011128704}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3480408179072}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5062411866112}, {'type': 'perplexity', 'in_batch_dim': [8, 320], 'batch_size': 8, 'flops': 6328014815744}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5378812603520}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4113209653888}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4113209653888}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5695213340928}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3480408179072}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6011614078336}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4746011128704}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5378812603520}, {'type': 'perplexity', 'in_batch_dim': [8, 320], 'batch_size': 8, 'flops': 6328014815744}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4429610391296}, {'type': 'perplexity', 'in_batch_dim': [8, 320], 'batch_size': 8, 'flops': 6328014815744}, {'type': 'perplexity', 'in_batch_dim': [8, 416], 'batch_size': 8, 'flops': 8226419240192}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5062411866112}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5062411866112}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2847606704256}, {'type': 'perplexity', 'in_batch_dim': [8, 336], 'batch_size': 8, 'flops': 6644415553152}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6011614078336}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5062411866112}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4113209653888}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5695213340928}, {'type': 'perplexity', 'in_batch_dim': [8, 416], 'batch_size': 8, 'flops': 8226419240192}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3796808916480}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5695213340928}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4746011128704}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4429610391296}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4113209653888}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3480408179072}, {'type': 'perplexity', 'in_batch_dim': [8, 320], 'batch_size': 8, 'flops': 6328014815744}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3480408179072}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5378812603520}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6011614078336}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4429610391296}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5695213340928}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5695213340928}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3480408179072}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4429610391296}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5062411866112}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3480408179072}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4113209653888}, {'type': 'perplexity', 'in_batch_dim': [8, 352], 'batch_size': 8, 'flops': 6960816290560}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4113209653888}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2847606704256}, {'type': 'perplexity', 'in_batch_dim': [8, 160], 'batch_size': 8, 'flops': 3164007441664}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3796808916480}, {'type': 'perplexity', 'in_batch_dim': [8, 496], 'batch_size': 8, 'flops': 9808422927232}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3480408179072}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4113209653888}, {'type': 'perplexity', 'in_batch_dim': [8, 544], 'batch_size': 8, 'flops': 10757625139456}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6011614078336}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5378812603520}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5695213340928}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4746011128704}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5695213340928}, {'type': 'perplexity', 'in_batch_dim': [8, 384], 'batch_size': 8, 'flops': 7593617765376}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3480408179072}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5378812603520}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3480408179072}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5378812603520}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5062411866112}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3796808916480}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5062411866112}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5695213340928}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5378812603520}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4429610391296}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5062411866112}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3480408179072}, {'type': 'perplexity', 'in_batch_dim': [8, 656], 'batch_size': 8, 'flops': 12972430301312}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5062411866112}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3796808916480}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3480408179072}, {'type': 'perplexity', 'in_batch_dim': [8, 160], 'batch_size': 8, 'flops': 3164007441664}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5695213340928}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6011614078336}, {'type': 'perplexity', 'in_batch_dim': [8, 320], 'batch_size': 8, 'flops': 6328014815744}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5062411866112}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5062411866112}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5695213340928}, {'type': 'perplexity', 'in_batch_dim': [8, 320], 'batch_size': 8, 'flops': 6328014815744}, {'type': 'perplexity', 'in_batch_dim': [8, 384], 'batch_size': 8, 'flops': 7593617765376}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5062411866112}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4746011128704}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4113209653888}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4746011128704}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4746011128704}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3480408179072}, {'type': 'perplexity', 'in_batch_dim': [8, 320], 'batch_size': 8, 'flops': 6328014815744}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5695213340928}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5062411866112}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4113209653888}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4746011128704}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2847606704256}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3796808916480}, {'type': 'perplexity', 'in_batch_dim': [8, 336], 'batch_size': 8, 'flops': 6644415553152}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4113209653888}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4746011128704}, {'type': 'perplexity', 'in_batch_dim': [8, 496], 'batch_size': 8, 'flops': 9808422927232}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5378812603520}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5378812603520}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4429610391296}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5062411866112}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6011614078336}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4746011128704}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3796808916480}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4113209653888}, {'type': 'perplexity', 'in_batch_dim': [8, 496], 'batch_size': 8, 'flops': 9808422927232}, {'type': 'perplexity', 'in_batch_dim': [8, 448], 'batch_size': 8, 'flops': 8859220715008}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6011614078336}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3796808916480}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3480408179072}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5062411866112}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4429610391296}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4113209653888}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4746011128704}, {'type': 'perplexity', 'in_batch_dim': [8, 320], 'batch_size': 8, 'flops': 6328014815744}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6011614078336}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3480408179072}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3796808916480}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3796808916480}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4746011128704}, {'type': 'perplexity', 'in_batch_dim': [8, 416], 'batch_size': 8, 'flops': 8226419240192}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5695213340928}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6011614078336}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6011614078336}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4746011128704}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3480408179072}, {'type': 'perplexity', 'in_batch_dim': [8, 160], 'batch_size': 8, 'flops': 3164007441664}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5695213340928}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3480408179072}, {'type': 'perplexity', 'in_batch_dim': [8, 400], 'batch_size': 8, 'flops': 7910018502784}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4429610391296}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5378812603520}, {'type': 'perplexity', 'in_batch_dim': [8, 400], 'batch_size': 8, 'flops': 7910018502784}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5378812603520}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3480408179072}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6011614078336}, {'type': 'perplexity', 'in_batch_dim': [8, 160], 'batch_size': 8, 'flops': 3164007441664}, {'type': 'perplexity', 'in_batch_dim': [8, 352], 'batch_size': 8, 'flops': 6960816290560}, {'type': 'perplexity', 'in_batch_dim': [8, 320], 'batch_size': 8, 'flops': 6328014815744}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5062411866112}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5062411866112}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4113209653888}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4429610391296}, {'type': 'perplexity', 'in_batch_dim': [8, 352], 'batch_size': 8, 'flops': 6960816290560}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4113209653888}, {'type': 'perplexity', 'in_batch_dim': [8, 160], 'batch_size': 8, 'flops': 3164007441664}, {'type': 'perplexity', 'in_batch_dim': [8, 336], 'batch_size': 8, 'flops': 6644415553152}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4429610391296}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3796808916480}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5378812603520}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5378812603520}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4113209653888}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5378812603520}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3796808916480}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5378812603520}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5378812603520}, {'type': 'perplexity', 'in_batch_dim': [8, 160], 'batch_size': 8, 'flops': 3164007441664}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3480408179072}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6011614078336}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5695213340928}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5378812603520}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5378812603520}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4429610391296}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4746011128704}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6011614078336}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5062411866112}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6011614078336}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5378812603520}, {'type': 'perplexity', 'in_batch_dim': [8, 160], 'batch_size': 8, 'flops': 3164007441664}, {'type': 'perplexity', 'in_batch_dim': [8, 160], 'batch_size': 8, 'flops': 3164007441664}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5062411866112}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5062411866112}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3796808916480}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6011614078336}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4113209653888}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4746011128704}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5378812603520}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3480408179072}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3796808916480}, {'type': 'perplexity', 'in_batch_dim': [8, 320], 'batch_size': 8, 'flops': 6328014815744}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5062411866112}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4746011128704}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5062411866112}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4746011128704}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6011614078336}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3796808916480}, {'type': 'perplexity', 'in_batch_dim': [8, 160], 'batch_size': 8, 'flops': 3164007441664}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2847606704256}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4429610391296}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4746011128704}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5062411866112}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5378812603520}, {'type': 'perplexity', 'in_batch_dim': [8, 464], 'batch_size': 8, 'flops': 9175621452416}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4113209653888}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3480408179072}, {'type': 'perplexity', 'in_batch_dim': [8, 384], 'batch_size': 8, 'flops': 7593617765376}, {'type': 'perplexity', 'in_batch_dim': [8, 448], 'batch_size': 8, 'flops': 8859220715008}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5378812603520}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4113209653888}, {'type': 'perplexity', 'in_batch_dim': [8, 320], 'batch_size': 8, 'flops': 6328014815744}, {'type': 'perplexity', 'in_batch_dim': [8, 560], 'batch_size': 8, 'flops': 11074025876864}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5378812603520}, {'type': 'perplexity', 'in_batch_dim': [8, 160], 'batch_size': 8, 'flops': 3164007441664}, {'type': 'perplexity', 'in_batch_dim': [8, 320], 'batch_size': 8, 'flops': 6328014815744}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5062411866112}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6011614078336}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4746011128704}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6011614078336}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3796808916480}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4429610391296}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4113209653888}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4746011128704}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3480408179072}, {'type': 'perplexity', 'in_batch_dim': [8, 384], 'batch_size': 8, 'flops': 7593617765376}, {'type': 'perplexity', 'in_batch_dim': [8, 576], 'batch_size': 8, 'flops': 11390426614272}, {'type': 'perplexity', 'in_batch_dim': [8, 160], 'batch_size': 8, 'flops': 3164007441664}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4746011128704}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4746011128704}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4429610391296}, {'type': 'perplexity', 'in_batch_dim': [8, 336], 'batch_size': 8, 'flops': 6644415553152}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4746011128704}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3796808916480}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3796808916480}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5062411866112}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5378812603520}, {'type': 'perplexity', 'in_batch_dim': [8, 368], 'batch_size': 8, 'flops': 7277217027968}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3480408179072}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4113209653888}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4746011128704}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3480408179072}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4746011128704}, {'type': 'perplexity', 'in_batch_dim': [8, 1168], 'batch_size': 8, 'flops': 23097253898368}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5062411866112}, {'type': 'perplexity', 'in_batch_dim': [8, 160], 'batch_size': 8, 'flops': 3164007441664}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4746011128704}, {'type': 'perplexity', 'in_batch_dim': [8, 640], 'batch_size': 8, 'flops': 12656029563904}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5378812603520}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4429610391296}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4429610391296}, {'type': 'perplexity', 'in_batch_dim': [8, 384], 'batch_size': 8, 'flops': 7593617765376}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3480408179072}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4113209653888}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3796808916480}, {'type': 'perplexity', 'in_batch_dim': [8, 336], 'batch_size': 8, 'flops': 6644415553152}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3480408179072}, {'type': 'perplexity', 'in_batch_dim': [6, 208], 'batch_size': 8, 'flops': 4113209653888}], 'timestamp': '2025-09-10 02:20:38.286390', 'step': 2205, 'epoch': 1} {'type': 'pplx', 'content': 17307864.411721557, 'timestamp': '2025-09-10 02:20:38.289200', 'step': 2205, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 304], 'flops': 9017733530176}, 'timestamp': '2025-09-10 02:20:38.319658', 'step': 2205, 'epoch': 1} {'type': 'loss', 'content': 0.03866380453109741, 'timestamp': '2025-09-10 02:20:38.330198', 'step': 2206, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 6644714900096}, 'timestamp': '2025-09-10 02:20:38.363594', 'step': 2206, 'epoch': 1} {'type': 'loss', 'content': 0.0060219233855605125, 'timestamp': '2025-09-10 02:20:38.370493', 'step': 2207, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 8068526078144}, 'timestamp': '2025-09-10 02:20:38.402011', 'step': 2207, 'epoch': 1} {'type': 'loss', 'content': 0.01440694835036993, 'timestamp': '2025-09-10 02:20:38.433053', 'step': 2208, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 6170111174080}, 'timestamp': '2025-09-10 02:20:38.465167', 'step': 2208, 'epoch': 1} {'type': 'loss', 'content': 0.01927160657942295, 'timestamp': '2025-09-10 02:20:38.469613', 'step': 2209, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 7593922352128}, 'timestamp': '2025-09-10 02:20:38.500179', 'step': 2209, 'epoch': 1} {'type': 'loss', 'content': 0.011063886806368828, 'timestamp': '2025-09-10 02:20:38.507667', 'step': 2210, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 7593922352128}, 'timestamp': '2025-09-10 02:20:38.538872', 'step': 2210, 'epoch': 1} {'type': 'loss', 'content': 0.01112239807844162, 'timestamp': '2025-09-10 02:20:38.546466', 'step': 2211, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 6170111174080}, 'timestamp': '2025-09-10 02:20:38.578628', 'step': 2211, 'epoch': 1} {'type': 'loss', 'content': 0.007352620828896761, 'timestamp': '2025-09-10 02:20:38.606233', 'step': 2212, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 6644714900096}, 'timestamp': '2025-09-10 02:20:38.638343', 'step': 2212, 'epoch': 1} {'type': 'loss', 'content': 0.004721261560916901, 'timestamp': '2025-09-10 02:20:38.643046', 'step': 2213, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 7119318626112}, 'timestamp': '2025-09-10 02:20:38.673567', 'step': 2213, 'epoch': 1} {'type': 'loss', 'content': 0.0031937905587255955, 'timestamp': '2025-09-10 02:20:38.680945', 'step': 2214, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 8543129804160}, 'timestamp': '2025-09-10 02:20:38.712648', 'step': 2214, 'epoch': 1} {'type': 'loss', 'content': 0.027609581127762794, 'timestamp': '2025-09-10 02:20:38.723411', 'step': 2215, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 7593922352128}, 'timestamp': '2025-09-10 02:20:38.754538', 'step': 2215, 'epoch': 1} {'type': 'loss', 'content': 0.030603447929024696, 'timestamp': '2025-09-10 02:20:38.783175', 'step': 2216, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 7593922352128}, 'timestamp': '2025-09-10 02:20:38.814814', 'step': 2216, 'epoch': 1} {'type': 'loss', 'content': 0.037538252770900726, 'timestamp': '2025-09-10 02:20:38.820166', 'step': 2217, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 5695507448064}, 'timestamp': '2025-09-10 02:20:38.851234', 'step': 2217, 'epoch': 1} {'type': 'loss', 'content': 0.01803760603070259, 'timestamp': '2025-09-10 02:20:38.855760', 'step': 2218, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 6644714900096}, 'timestamp': '2025-09-10 02:20:38.886704', 'step': 2218, 'epoch': 1} {'type': 'loss', 'content': 0.0046121361665427685, 'timestamp': '2025-09-10 02:20:38.893551', 'step': 2219, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 304], 'flops': 9017733530176}, 'timestamp': '2025-09-10 02:20:38.924971', 'step': 2219, 'epoch': 1} {'type': 'loss', 'content': 0.011201003566384315, 'timestamp': '2025-09-10 02:20:38.957648', 'step': 2220, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 7119318626112}, 'timestamp': '2025-09-10 02:20:38.989597', 'step': 2220, 'epoch': 1} {'type': 'loss', 'content': 0.02293272875249386, 'timestamp': '2025-09-10 02:20:38.994574', 'step': 2221, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 7119318626112}, 'timestamp': '2025-09-10 02:20:39.025660', 'step': 2221, 'epoch': 1} {'type': 'loss', 'content': 0.010559617541730404, 'timestamp': '2025-09-10 02:20:39.033153', 'step': 2222, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 6170111174080}, 'timestamp': '2025-09-10 02:20:39.064015', 'step': 2222, 'epoch': 1} {'type': 'loss', 'content': 0.0019281571730971336, 'timestamp': '2025-09-10 02:20:39.070877', 'step': 2223, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 8068526078144}, 'timestamp': '2025-09-10 02:20:39.102032', 'step': 2223, 'epoch': 1} {'type': 'loss', 'content': 0.0065590995363891125, 'timestamp': '2025-09-10 02:20:39.134526', 'step': 2224, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 8068526078144}, 'timestamp': '2025-09-10 02:20:39.166870', 'step': 2224, 'epoch': 1} {'type': 'loss', 'content': 0.0038079393561929464, 'timestamp': '2025-09-10 02:20:39.174362', 'step': 2225, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 304], 'flops': 9017733530176}, 'timestamp': '2025-09-10 02:20:39.207567', 'step': 2225, 'epoch': 1} {'type': 'loss', 'content': 0.015453227795660496, 'timestamp': '2025-09-10 02:20:39.219510', 'step': 2226, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 5695507448064}, 'timestamp': '2025-09-10 02:20:39.250961', 'step': 2226, 'epoch': 1} {'type': 'loss', 'content': 0.004905925132334232, 'timestamp': '2025-09-10 02:20:39.255352', 'step': 2227, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 6644714900096}, 'timestamp': '2025-09-10 02:20:39.286245', 'step': 2227, 'epoch': 1} {'type': 'loss', 'content': 0.001167826121672988, 'timestamp': '2025-09-10 02:20:39.314110', 'step': 2228, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-10 02:20:39.344406', 'step': 2228, 'epoch': 1} {'type': 'loss', 'content': 0.013551952317357063, 'timestamp': '2025-09-10 02:20:39.346565', 'step': 2229, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 6170111174080}, 'timestamp': '2025-09-10 02:20:39.377987', 'step': 2229, 'epoch': 1} {'type': 'loss', 'content': 0.005024695303291082, 'timestamp': '2025-09-10 02:20:39.384822', 'step': 2230, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 480], 'flops': 14238374516352}, 'timestamp': '2025-09-10 02:20:39.427690', 'step': 2230, 'epoch': 1} {'type': 'loss', 'content': 0.025890955701470375, 'timestamp': '2025-09-10 02:20:39.445135', 'step': 2231, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 8068526078144}, 'timestamp': '2025-09-10 02:20:39.479800', 'step': 2231, 'epoch': 1} {'type': 'loss', 'content': 0.005515389610081911, 'timestamp': '2025-09-10 02:20:39.511039', 'step': 2232, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 7593922352128}, 'timestamp': '2025-09-10 02:20:39.542422', 'step': 2232, 'epoch': 1} {'type': 'loss', 'content': 0.015928935259580612, 'timestamp': '2025-09-10 02:20:39.547540', 'step': 2233, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 7119318626112}, 'timestamp': '2025-09-10 02:20:39.580999', 'step': 2233, 'epoch': 1} {'type': 'loss', 'content': 0.0011972986394539475, 'timestamp': '2025-09-10 02:20:39.588232', 'step': 2234, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 176], 'flops': 5220903722048}, 'timestamp': '2025-09-10 02:20:39.622143', 'step': 2234, 'epoch': 1} {'type': 'loss', 'content': 0.00578897912055254, 'timestamp': '2025-09-10 02:20:39.625930', 'step': 2235, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 416], 'flops': 12339959612288}, 'timestamp': '2025-09-10 02:20:39.664400', 'step': 2235, 'epoch': 1} {'type': 'loss', 'content': 0.0018228079425171018, 'timestamp': '2025-09-10 02:20:39.701132', 'step': 2236, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 5695507448064}, 'timestamp': '2025-09-10 02:20:39.731827', 'step': 2236, 'epoch': 1} {'type': 'loss', 'content': 0.0009967860532924533, 'timestamp': '2025-09-10 02:20:39.734081', 'step': 2237, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 5695507448064}, 'timestamp': '2025-09-10 02:20:39.764834', 'step': 2237, 'epoch': 1} {'type': 'loss', 'content': 0.027059337124228477, 'timestamp': '2025-09-10 02:20:39.769194', 'step': 2238, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 7593922352128}, 'timestamp': '2025-09-10 02:20:39.800765', 'step': 2238, 'epoch': 1} {'type': 'loss', 'content': 0.02666650339961052, 'timestamp': '2025-09-10 02:20:39.808311', 'step': 2239, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 8543129804160}, 'timestamp': '2025-09-10 02:20:39.840159', 'step': 2239, 'epoch': 1} {'type': 'loss', 'content': 0.024572152644395828, 'timestamp': '2025-09-10 02:20:39.871790', 'step': 2240, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 7119318626112}, 'timestamp': '2025-09-10 02:20:39.903716', 'step': 2240, 'epoch': 1} {'type': 'loss', 'content': 0.006256606429815292, 'timestamp': '2025-09-10 02:20:39.908708', 'step': 2241, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 336], 'flops': 9966940982208}, 'timestamp': '2025-09-10 02:20:39.943990', 'step': 2241, 'epoch': 1} {'type': 'loss', 'content': 0.009752501733601093, 'timestamp': '2025-09-10 02:20:39.957370', 'step': 2242, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-10 02:20:39.988230', 'step': 2242, 'epoch': 1} {'type': 'loss', 'content': 0.0497528612613678, 'timestamp': '2025-09-10 02:20:39.990533', 'step': 2243, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 6644714900096}, 'timestamp': '2025-09-10 02:20:40.022012', 'step': 2243, 'epoch': 1} {'type': 'loss', 'content': 0.010288014076650143, 'timestamp': '2025-09-10 02:20:40.049924', 'step': 2244, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 6644714900096}, 'timestamp': '2025-09-10 02:20:40.082626', 'step': 2244, 'epoch': 1} {'type': 'loss', 'content': 0.004436141811311245, 'timestamp': '2025-09-10 02:20:40.087296', 'step': 2245, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 496], 'flops': 14712978242368}, 'timestamp': '2025-09-10 02:20:40.129627', 'step': 2245, 'epoch': 1} {'type': 'loss', 'content': 0.016879761591553688, 'timestamp': '2025-09-10 02:20:40.147151', 'step': 2246, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 8068526078144}, 'timestamp': '2025-09-10 02:20:40.179232', 'step': 2246, 'epoch': 1} {'type': 'loss', 'content': 0.005393616855144501, 'timestamp': '2025-09-10 02:20:40.189267', 'step': 2247, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 8068526078144}, 'timestamp': '2025-09-10 02:20:40.220928', 'step': 2247, 'epoch': 1} {'type': 'loss', 'content': 0.006263823714107275, 'timestamp': '2025-09-10 02:20:40.251889', 'step': 2248, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 6170111174080}, 'timestamp': '2025-09-10 02:20:40.284453', 'step': 2248, 'epoch': 1} {'type': 'loss', 'content': 0.0019508127588778734, 'timestamp': '2025-09-10 02:20:40.288974', 'step': 2249, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-10 02:20:40.321075', 'step': 2249, 'epoch': 1} {'type': 'loss', 'content': 0.016456058248877525, 'timestamp': '2025-09-10 02:20:40.323232', 'step': 2250, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 7593922352128}, 'timestamp': '2025-09-10 02:20:40.354189', 'step': 2250, 'epoch': 1} {'type': 'loss', 'content': 0.0015526397619396448, 'timestamp': '2025-09-10 02:20:40.361701', 'step': 2251, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 416], 'flops': 12339959612288}, 'timestamp': '2025-09-10 02:20:40.401026', 'step': 2251, 'epoch': 1} {'type': 'loss', 'content': 0.002204819582402706, 'timestamp': '2025-09-10 02:20:40.437770', 'step': 2252, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 8543129804160}, 'timestamp': '2025-09-10 02:20:40.469534', 'step': 2252, 'epoch': 1} {'type': 'loss', 'content': 0.0038875446189194918, 'timestamp': '2025-09-10 02:20:40.477752', 'step': 2253, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 6644714900096}, 'timestamp': '2025-09-10 02:20:40.509005', 'step': 2253, 'epoch': 1} {'type': 'loss', 'content': 0.003834874602034688, 'timestamp': '2025-09-10 02:20:40.515829', 'step': 2254, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 7119318626112}, 'timestamp': '2025-09-10 02:20:40.546182', 'step': 2254, 'epoch': 1} {'type': 'loss', 'content': 0.007351601030677557, 'timestamp': '2025-09-10 02:20:40.553690', 'step': 2255, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 7593922352128}, 'timestamp': '2025-09-10 02:20:40.585632', 'step': 2255, 'epoch': 1} {'type': 'loss', 'content': 0.018372252583503723, 'timestamp': '2025-09-10 02:20:40.614197', 'step': 2256, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 368], 'flops': 10916148434240}, 'timestamp': '2025-09-10 02:20:40.647153', 'step': 2256, 'epoch': 1} {'type': 'loss', 'content': 0.013634584844112396, 'timestamp': '2025-09-10 02:20:40.660245', 'step': 2257, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 6644714900096}, 'timestamp': '2025-09-10 02:20:40.691001', 'step': 2257, 'epoch': 1} {'type': 'loss', 'content': 0.008646705187857151, 'timestamp': '2025-09-10 02:20:40.697964', 'step': 2258, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 7119318626112}, 'timestamp': '2025-09-10 02:20:40.729949', 'step': 2258, 'epoch': 1} {'type': 'loss', 'content': 0.008055547252297401, 'timestamp': '2025-09-10 02:20:40.737425', 'step': 2259, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 6644714900096}, 'timestamp': '2025-09-10 02:20:40.769504', 'step': 2259, 'epoch': 1} {'type': 'loss', 'content': 0.019878502935171127, 'timestamp': '2025-09-10 02:20:40.796993', 'step': 2260, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 8068526078144}, 'timestamp': '2025-09-10 02:20:40.830990', 'step': 2260, 'epoch': 1} {'type': 'loss', 'content': 0.007829791866242886, 'timestamp': '2025-09-10 02:20:40.837243', 'step': 2261, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 176], 'flops': 5220903722048}, 'timestamp': '2025-09-10 02:20:40.868790', 'step': 2261, 'epoch': 1} {'type': 'loss', 'content': 0.01760595664381981, 'timestamp': '2025-09-10 02:20:40.872455', 'step': 2262, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 400], 'flops': 11865355886272}, 'timestamp': '2025-09-10 02:20:40.910484', 'step': 2262, 'epoch': 1} {'type': 'loss', 'content': 0.0019513925071805716, 'timestamp': '2025-09-10 02:20:40.926123', 'step': 2263, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 6644714900096}, 'timestamp': '2025-09-10 02:20:40.957429', 'step': 2263, 'epoch': 1} {'type': 'loss', 'content': 0.0019971609581261873, 'timestamp': '2025-09-10 02:20:40.985426', 'step': 2264, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 8543129804160}, 'timestamp': '2025-09-10 02:20:41.016592', 'step': 2264, 'epoch': 1} {'type': 'loss', 'content': 0.006590723525732756, 'timestamp': '2025-09-10 02:20:41.024702', 'step': 2265, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 336], 'flops': 9966940982208}, 'timestamp': '2025-09-10 02:20:41.058410', 'step': 2265, 'epoch': 1} {'type': 'loss', 'content': 0.0263630710542202, 'timestamp': '2025-09-10 02:20:41.071772', 'step': 2266, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 304], 'flops': 9017733530176}, 'timestamp': '2025-09-10 02:20:41.103787', 'step': 2266, 'epoch': 1} {'type': 'loss', 'content': 0.003090274054557085, 'timestamp': '2025-09-10 02:20:41.115712', 'step': 2267, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 7119318626112}, 'timestamp': '2025-09-10 02:20:41.146821', 'step': 2267, 'epoch': 1} {'type': 'loss', 'content': 0.01568448916077614, 'timestamp': '2025-09-10 02:20:41.174954', 'step': 2268, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 304], 'flops': 9017733530176}, 'timestamp': '2025-09-10 02:20:41.206104', 'step': 2268, 'epoch': 1} {'type': 'loss', 'content': 0.0363653302192688, 'timestamp': '2025-09-10 02:20:41.215843', 'step': 2269, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 6644714900096}, 'timestamp': '2025-09-10 02:20:41.247801', 'step': 2269, 'epoch': 1} {'type': 'loss', 'content': 0.014266527257859707, 'timestamp': '2025-09-10 02:20:41.254956', 'step': 2270, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 336], 'flops': 9966940982208}, 'timestamp': '2025-09-10 02:20:41.288532', 'step': 2270, 'epoch': 1} {'type': 'loss', 'content': 0.0015003466978669167, 'timestamp': '2025-09-10 02:20:41.301902', 'step': 2271, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 320], 'flops': 9492337256192}, 'timestamp': '2025-09-10 02:20:41.333348', 'step': 2271, 'epoch': 1} {'type': 'loss', 'content': 0.0023136555682867765, 'timestamp': '2025-09-10 02:20:41.366712', 'step': 2272, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 6644714900096}, 'timestamp': '2025-09-10 02:20:41.400192', 'step': 2272, 'epoch': 1} {'type': 'loss', 'content': 0.021259073168039322, 'timestamp': '2025-09-10 02:20:41.404442', 'step': 2273, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-10 02:20:41.436347', 'step': 2273, 'epoch': 1} {'type': 'loss', 'content': 0.0009670493309386075, 'timestamp': '2025-09-10 02:20:41.438630', 'step': 2274, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 4746299996032}, 'timestamp': '2025-09-10 02:20:41.469990', 'step': 2274, 'epoch': 1} {'type': 'loss', 'content': 0.02005203254520893, 'timestamp': '2025-09-10 02:20:41.472485', 'step': 2275, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 6644714900096}, 'timestamp': '2025-09-10 02:20:41.504586', 'step': 2275, 'epoch': 1} {'type': 'loss', 'content': 0.0043064602650702, 'timestamp': '2025-09-10 02:20:41.532548', 'step': 2276, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 6644714900096}, 'timestamp': '2025-09-10 02:20:41.563391', 'step': 2276, 'epoch': 1} {'type': 'loss', 'content': 0.01462549064308405, 'timestamp': '2025-09-10 02:20:41.568013', 'step': 2277, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 7119318626112}, 'timestamp': '2025-09-10 02:20:41.599710', 'step': 2277, 'epoch': 1} {'type': 'loss', 'content': 0.01992652751505375, 'timestamp': '2025-09-10 02:20:41.606950', 'step': 2278, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 7119318626112}, 'timestamp': '2025-09-10 02:20:41.638093', 'step': 2278, 'epoch': 1} {'type': 'loss', 'content': 0.0108196334913373, 'timestamp': '2025-09-10 02:20:41.645374', 'step': 2279, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 8068526078144}, 'timestamp': '2025-09-10 02:20:41.676660', 'step': 2279, 'epoch': 1} {'type': 'loss', 'content': 0.001470404677093029, 'timestamp': '2025-09-10 02:20:41.707742', 'step': 2280, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 6170111174080}, 'timestamp': '2025-09-10 02:20:41.737903', 'step': 2280, 'epoch': 1} {'type': 'loss', 'content': 0.004251373466104269, 'timestamp': '2025-09-10 02:20:41.742522', 'step': 2281, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 384], 'flops': 11390752160256}, 'timestamp': '2025-09-10 02:20:41.777090', 'step': 2281, 'epoch': 1} {'type': 'loss', 'content': 0.009216892533004284, 'timestamp': '2025-09-10 02:20:41.791186', 'step': 2282, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 368], 'flops': 10916148434240}, 'timestamp': '2025-09-10 02:20:41.826307', 'step': 2282, 'epoch': 1} {'type': 'loss', 'content': 0.020200418308377266, 'timestamp': '2025-09-10 02:20:41.840250', 'step': 2283, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 176], 'flops': 5220903722048}, 'timestamp': '2025-09-10 02:20:41.870676', 'step': 2283, 'epoch': 1} {'type': 'loss', 'content': 0.0011728627141565084, 'timestamp': '2025-09-10 02:20:41.895569', 'step': 2284, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 368], 'flops': 10916148434240}, 'timestamp': '2025-09-10 02:20:41.927789', 'step': 2284, 'epoch': 1} {'type': 'loss', 'content': 0.004072748590260744, 'timestamp': '2025-09-10 02:20:41.940922', 'step': 2285, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 6170111174080}, 'timestamp': '2025-09-10 02:20:41.971763', 'step': 2285, 'epoch': 1} {'type': 'loss', 'content': 0.03455633297562599, 'timestamp': '2025-09-10 02:20:41.978289', 'step': 2286, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 7593922352128}, 'timestamp': '2025-09-10 02:20:42.012708', 'step': 2286, 'epoch': 1} {'type': 'loss', 'content': 0.01204759068787098, 'timestamp': '2025-09-10 02:20:42.020129', 'step': 2287, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 176], 'flops': 5220903722048}, 'timestamp': '2025-09-10 02:20:42.051484', 'step': 2287, 'epoch': 1} {'type': 'loss', 'content': 0.0008365390822291374, 'timestamp': '2025-09-10 02:20:42.076075', 'step': 2288, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 6170111174080}, 'timestamp': '2025-09-10 02:20:42.108564', 'step': 2288, 'epoch': 1} {'type': 'loss', 'content': 0.04001276567578316, 'timestamp': '2025-09-10 02:20:42.112886', 'step': 2289, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 176], 'flops': 5220903722048}, 'timestamp': '2025-09-10 02:20:42.143380', 'step': 2289, 'epoch': 1} {'type': 'loss', 'content': 0.007685269229114056, 'timestamp': '2025-09-10 02:20:42.147494', 'step': 2290, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 320], 'flops': 9492337256192}, 'timestamp': '2025-09-10 02:20:42.178433', 'step': 2290, 'epoch': 1} {'type': 'loss', 'content': 0.0020888035651296377, 'timestamp': '2025-09-10 02:20:42.191101', 'step': 2291, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 6644714900096}, 'timestamp': '2025-09-10 02:20:42.222633', 'step': 2291, 'epoch': 1} {'type': 'loss', 'content': 0.0040938640013337135, 'timestamp': '2025-09-10 02:20:42.250470', 'step': 2292, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 7119318626112}, 'timestamp': '2025-09-10 02:20:42.281635', 'step': 2292, 'epoch': 1} {'type': 'loss', 'content': 0.001731925061903894, 'timestamp': '2025-09-10 02:20:42.286685', 'step': 2293, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 8068526078144}, 'timestamp': '2025-09-10 02:20:42.317261', 'step': 2293, 'epoch': 1} {'type': 'loss', 'content': 0.020662318915128708, 'timestamp': '2025-09-10 02:20:42.327342', 'step': 2294, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 8068526078144}, 'timestamp': '2025-09-10 02:20:42.358388', 'step': 2294, 'epoch': 1} {'type': 'loss', 'content': 0.0022532641887664795, 'timestamp': '2025-09-10 02:20:42.368483', 'step': 2295, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 5695507448064}, 'timestamp': '2025-09-10 02:20:42.398792', 'step': 2295, 'epoch': 1} {'type': 'loss', 'content': 0.0038333218544721603, 'timestamp': '2025-09-10 02:20:42.424245', 'step': 2296, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-10 02:20:42.455186', 'step': 2296, 'epoch': 1} {'type': 'loss', 'content': 0.014495732262730598, 'timestamp': '2025-09-10 02:20:42.457421', 'step': 2297, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 7119318626112}, 'timestamp': '2025-09-10 02:20:42.488427', 'step': 2297, 'epoch': 1} {'type': 'loss', 'content': 0.017322422936558723, 'timestamp': '2025-09-10 02:20:42.495776', 'step': 2298, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 5695507448064}, 'timestamp': '2025-09-10 02:20:42.526511', 'step': 2298, 'epoch': 1} {'type': 'loss', 'content': 0.0031519909389317036, 'timestamp': '2025-09-10 02:20:42.530996', 'step': 2299, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 8543129804160}, 'timestamp': '2025-09-10 02:20:42.562167', 'step': 2299, 'epoch': 1} {'type': 'loss', 'content': 0.0060340953059494495, 'timestamp': '2025-09-10 02:20:42.593565', 'step': 2300, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 176], 'flops': 5220903722048}, 'timestamp': '2025-09-10 02:20:42.624703', 'step': 2300, 'epoch': 1} {'type': 'loss', 'content': 0.0017768697580322623, 'timestamp': '2025-09-10 02:20:42.626635', 'step': 2301, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 176], 'flops': 5220903722048}, 'timestamp': '2025-09-10 02:20:42.657184', 'step': 2301, 'epoch': 1} {'type': 'loss', 'content': 0.002710967091843486, 'timestamp': '2025-09-10 02:20:42.661251', 'step': 2302, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 8543129804160}, 'timestamp': '2025-09-10 02:20:42.692441', 'step': 2302, 'epoch': 1} {'type': 'loss', 'content': 0.002997096860781312, 'timestamp': '2025-09-10 02:20:42.703313', 'step': 2303, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 336], 'flops': 9966940982208}, 'timestamp': '2025-09-10 02:20:42.737922', 'step': 2303, 'epoch': 1} {'type': 'loss', 'content': 0.0036649901885539293, 'timestamp': '2025-09-10 02:20:42.772214', 'step': 2304, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 7119318626112}, 'timestamp': '2025-09-10 02:20:42.802806', 'step': 2304, 'epoch': 1} {'type': 'loss', 'content': 0.01135755330324173, 'timestamp': '2025-09-10 02:20:42.807834', 'step': 2305, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 400], 'flops': 11865355886272}, 'timestamp': '2025-09-10 02:20:42.846726', 'step': 2305, 'epoch': 1} {'type': 'loss', 'content': 0.005223255138844252, 'timestamp': '2025-09-10 02:20:42.862333', 'step': 2306, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 7593922352128}, 'timestamp': '2025-09-10 02:20:42.894696', 'step': 2306, 'epoch': 1} {'type': 'loss', 'content': 0.007239340338855982, 'timestamp': '2025-09-10 02:20:42.902033', 'step': 2307, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 480], 'flops': 14238374516352}, 'timestamp': '2025-09-10 02:20:42.944257', 'step': 2307, 'epoch': 1} {'type': 'loss', 'content': 0.017995523288846016, 'timestamp': '2025-09-10 02:20:42.982439', 'step': 2308, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 6644714900096}, 'timestamp': '2025-09-10 02:20:43.013889', 'step': 2308, 'epoch': 1} {'type': 'loss', 'content': 0.008536996319890022, 'timestamp': '2025-09-10 02:20:43.018208', 'step': 2309, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 5695507448064}, 'timestamp': '2025-09-10 02:20:43.048443', 'step': 2309, 'epoch': 1} {'type': 'loss', 'content': 0.028488274663686752, 'timestamp': '2025-09-10 02:20:43.052782', 'step': 2310, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 5695507448064}, 'timestamp': '2025-09-10 02:20:43.083593', 'step': 2310, 'epoch': 1} {'type': 'loss', 'content': 0.04561712220311165, 'timestamp': '2025-09-10 02:20:43.088056', 'step': 2311, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 6644714900096}, 'timestamp': '2025-09-10 02:20:43.119831', 'step': 2311, 'epoch': 1} {'type': 'loss', 'content': 0.012657607905566692, 'timestamp': '2025-09-10 02:20:43.147693', 'step': 2312, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 6170111174080}, 'timestamp': '2025-09-10 02:20:43.177914', 'step': 2312, 'epoch': 1} {'type': 'loss', 'content': 0.010057737119495869, 'timestamp': '2025-09-10 02:20:43.182495', 'step': 2313, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 8543129804160}, 'timestamp': '2025-09-10 02:20:43.215199', 'step': 2313, 'epoch': 1} {'type': 'loss', 'content': 0.019854340702295303, 'timestamp': '2025-09-10 02:20:43.226193', 'step': 2314, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 8068526078144}, 'timestamp': '2025-09-10 02:20:43.259025', 'step': 2314, 'epoch': 1} {'type': 'loss', 'content': 0.014225935563445091, 'timestamp': '2025-09-10 02:20:43.269294', 'step': 2315, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 6170111174080}, 'timestamp': '2025-09-10 02:20:43.303944', 'step': 2315, 'epoch': 1} {'type': 'loss', 'content': 0.0027752441819757223, 'timestamp': '2025-09-10 02:20:43.331839', 'step': 2316, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:20:43.362643', 'step': 2316, 'epoch': 1} {'type': 'loss', 'content': 0.006470884662121534, 'timestamp': '2025-09-10 02:20:43.365156', 'step': 2317, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 176], 'flops': 5220903722048}, 'timestamp': '2025-09-10 02:20:43.396458', 'step': 2317, 'epoch': 1} {'type': 'loss', 'content': 0.00640989001840353, 'timestamp': '2025-09-10 02:20:43.400569', 'step': 2318, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 320], 'flops': 9492337256192}, 'timestamp': '2025-09-10 02:20:43.432483', 'step': 2318, 'epoch': 1} {'type': 'loss', 'content': 0.01300732046365738, 'timestamp': '2025-09-10 02:20:43.445053', 'step': 2319, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 4746299996032}, 'timestamp': '2025-09-10 02:20:43.475866', 'step': 2319, 'epoch': 1} {'type': 'loss', 'content': 0.005316443741321564, 'timestamp': '2025-09-10 02:20:43.499172', 'step': 2320, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 6170111174080}, 'timestamp': '2025-09-10 02:20:43.532720', 'step': 2320, 'epoch': 1} {'type': 'loss', 'content': 0.014593763276934624, 'timestamp': '2025-09-10 02:20:43.536478', 'step': 2321, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 352], 'flops': 10441544708224}, 'timestamp': '2025-09-10 02:20:43.570893', 'step': 2321, 'epoch': 1} {'type': 'loss', 'content': 0.02588742971420288, 'timestamp': '2025-09-10 02:20:43.584597', 'step': 2322, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 352], 'flops': 10441544708224}, 'timestamp': '2025-09-10 02:20:43.620409', 'step': 2322, 'epoch': 1} {'type': 'loss', 'content': 0.002280434826388955, 'timestamp': '2025-09-10 02:20:43.634093', 'step': 2323, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 6644714900096}, 'timestamp': '2025-09-10 02:20:43.665408', 'step': 2323, 'epoch': 1} {'type': 'loss', 'content': 0.011547980830073357, 'timestamp': '2025-09-10 02:20:43.692821', 'step': 2324, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 8543129804160}, 'timestamp': '2025-09-10 02:20:43.725615', 'step': 2324, 'epoch': 1} {'type': 'loss', 'content': 0.019922899082303047, 'timestamp': '2025-09-10 02:20:43.733524', 'step': 2325, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 8068526078144}, 'timestamp': '2025-09-10 02:20:43.764663', 'step': 2325, 'epoch': 1} {'type': 'loss', 'content': 0.0015016960678622127, 'timestamp': '2025-09-10 02:20:43.774636', 'step': 2326, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 384], 'flops': 11390752160256}, 'timestamp': '2025-09-10 02:20:43.809550', 'step': 2326, 'epoch': 1} {'type': 'loss', 'content': 0.019193019717931747, 'timestamp': '2025-09-10 02:20:43.823553', 'step': 2327, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 464], 'flops': 13763770790336}, 'timestamp': '2025-09-10 02:20:43.864804', 'step': 2327, 'epoch': 1} {'type': 'loss', 'content': 0.009289233013987541, 'timestamp': '2025-09-10 02:20:43.902812', 'step': 2328, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 5695507448064}, 'timestamp': '2025-09-10 02:20:43.937387', 'step': 2328, 'epoch': 1} {'type': 'loss', 'content': 0.0029966922011226416, 'timestamp': '2025-09-10 02:20:43.939136', 'step': 2329, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 5695507448064}, 'timestamp': '2025-09-10 02:20:43.969998', 'step': 2329, 'epoch': 1} {'type': 'loss', 'content': 0.011959855444729328, 'timestamp': '2025-09-10 02:20:43.974267', 'step': 2330, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 336], 'flops': 9966940982208}, 'timestamp': '2025-09-10 02:20:44.008145', 'step': 2330, 'epoch': 1} {'type': 'loss', 'content': 0.0025715772062540054, 'timestamp': '2025-09-10 02:20:44.021546', 'step': 2331, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 8068526078144}, 'timestamp': '2025-09-10 02:20:44.053744', 'step': 2331, 'epoch': 1} {'type': 'loss', 'content': 0.007740771863609552, 'timestamp': '2025-09-10 02:20:44.083824', 'step': 2332, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 7119318626112}, 'timestamp': '2025-09-10 02:20:44.116498', 'step': 2332, 'epoch': 1} {'type': 'loss', 'content': 0.008812974207103252, 'timestamp': '2025-09-10 02:20:44.120577', 'step': 2333, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 304], 'flops': 9017733530176}, 'timestamp': '2025-09-10 02:20:44.153115', 'step': 2333, 'epoch': 1} {'type': 'loss', 'content': 0.009404649026691914, 'timestamp': '2025-09-10 02:20:44.164292', 'step': 2334, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 176], 'flops': 5220903722048}, 'timestamp': '2025-09-10 02:20:44.195361', 'step': 2334, 'epoch': 1} {'type': 'loss', 'content': 0.002871322212740779, 'timestamp': '2025-09-10 02:20:44.199476', 'step': 2335, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 7119318626112}, 'timestamp': '2025-09-10 02:20:44.231014', 'step': 2335, 'epoch': 1} {'type': 'loss', 'content': 0.00894598476588726, 'timestamp': '2025-09-10 02:20:44.259219', 'step': 2336, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 6170111174080}, 'timestamp': '2025-09-10 02:20:44.290134', 'step': 2336, 'epoch': 1} {'type': 'loss', 'content': 0.0016351427184417844, 'timestamp': '2025-09-10 02:20:44.294752', 'step': 2337, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 464], 'flops': 13763770790336}, 'timestamp': '2025-09-10 02:20:44.336635', 'step': 2337, 'epoch': 1} {'type': 'loss', 'content': 0.03164186701178551, 'timestamp': '2025-09-10 02:20:44.353688', 'step': 2338, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 6644714900096}, 'timestamp': '2025-09-10 02:20:44.385870', 'step': 2338, 'epoch': 1} {'type': 'loss', 'content': 0.002591783879324794, 'timestamp': '2025-09-10 02:20:44.392350', 'step': 2339, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 7119318626112}, 'timestamp': '2025-09-10 02:20:44.423985', 'step': 2339, 'epoch': 1} {'type': 'loss', 'content': 0.006208306644111872, 'timestamp': '2025-09-10 02:20:44.452230', 'step': 2340, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 336], 'flops': 9966940982208}, 'timestamp': '2025-09-10 02:20:44.483661', 'step': 2340, 'epoch': 1} {'type': 'loss', 'content': 0.034006860107183456, 'timestamp': '2025-09-10 02:20:44.496386', 'step': 2341, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 448], 'flops': 13289167064320}, 'timestamp': '2025-09-10 02:20:44.536358', 'step': 2341, 'epoch': 1} {'type': 'loss', 'content': 0.005256416741758585, 'timestamp': '2025-09-10 02:20:44.552804', 'step': 2342, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 7593922352128}, 'timestamp': '2025-09-10 02:20:44.584822', 'step': 2342, 'epoch': 1} {'type': 'loss', 'content': 0.009249741211533546, 'timestamp': '2025-09-10 02:20:44.591960', 'step': 2343, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 7593922352128}, 'timestamp': '2025-09-10 02:20:44.622800', 'step': 2343, 'epoch': 1} {'type': 'loss', 'content': 0.007146596908569336, 'timestamp': '2025-09-10 02:20:44.651295', 'step': 2344, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 7593922352128}, 'timestamp': '2025-09-10 02:20:44.682659', 'step': 2344, 'epoch': 1} {'type': 'loss', 'content': 0.0049681952223181725, 'timestamp': '2025-09-10 02:20:44.688005', 'step': 2345, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 7119318626112}, 'timestamp': '2025-09-10 02:20:44.718988', 'step': 2345, 'epoch': 1} {'type': 'loss', 'content': 0.018689943477511406, 'timestamp': '2025-09-10 02:20:44.726369', 'step': 2346, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 7119318626112}, 'timestamp': '2025-09-10 02:20:44.757158', 'step': 2346, 'epoch': 1} {'type': 'loss', 'content': 0.022296303883194923, 'timestamp': '2025-09-10 02:20:44.764741', 'step': 2347, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 8543129804160}, 'timestamp': '2025-09-10 02:20:44.796511', 'step': 2347, 'epoch': 1} {'type': 'loss', 'content': 0.0007350871455855668, 'timestamp': '2025-09-10 02:20:44.828415', 'step': 2348, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 176], 'flops': 5220903722048}, 'timestamp': '2025-09-10 02:20:44.859942', 'step': 2348, 'epoch': 1} {'type': 'loss', 'content': 0.0011058534728363156, 'timestamp': '2025-09-10 02:20:44.862115', 'step': 2349, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 352], 'flops': 10441544708224}, 'timestamp': '2025-09-10 02:20:44.896598', 'step': 2349, 'epoch': 1} {'type': 'loss', 'content': 0.027341008186340332, 'timestamp': '2025-09-10 02:20:44.910312', 'step': 2350, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 7119318626112}, 'timestamp': '2025-09-10 02:20:44.942224', 'step': 2350, 'epoch': 1} {'type': 'loss', 'content': 0.007282934617251158, 'timestamp': '2025-09-10 02:20:44.949678', 'step': 2351, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 656], 'flops': 19459015502528}, 'timestamp': '2025-09-10 02:20:45.004125', 'step': 2351, 'epoch': 1} {'type': 'loss', 'content': 0.00618086289614439, 'timestamp': '2025-09-10 02:20:45.048452', 'step': 2352, 'epoch': 1} {'type': 'flops', 'content': [{'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5695213340928}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3796808916480}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4429610391296}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4746011128704}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4746011128704}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6011614078336}, {'type': 'perplexity', 'in_batch_dim': [8, 736], 'batch_size': 8, 'flops': 14554433988352}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3796808916480}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4113209653888}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3796808916480}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4746011128704}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2847606704256}, {'type': 'perplexity', 'in_batch_dim': [8, 320], 'batch_size': 8, 'flops': 6328014815744}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5378812603520}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3796808916480}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4746011128704}, {'type': 'perplexity', 'in_batch_dim': [8, 352], 'batch_size': 8, 'flops': 6960816290560}, {'type': 'perplexity', 'in_batch_dim': [8, 384], 'batch_size': 8, 'flops': 7593617765376}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5378812603520}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4429610391296}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4113209653888}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5062411866112}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5062411866112}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4429610391296}, {'type': 'perplexity', 'in_batch_dim': [8, 480], 'batch_size': 8, 'flops': 9492022189824}, {'type': 'perplexity', 'in_batch_dim': [8, 448], 'batch_size': 8, 'flops': 8859220715008}, {'type': 'perplexity', 'in_batch_dim': [8, 320], 'batch_size': 8, 'flops': 6328014815744}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3796808916480}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6011614078336}, {'type': 'perplexity', 'in_batch_dim': [8, 320], 'batch_size': 8, 'flops': 6328014815744}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5695213340928}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4746011128704}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4429610391296}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5378812603520}, {'type': 'perplexity', 'in_batch_dim': [8, 544], 'batch_size': 8, 'flops': 10757625139456}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4429610391296}, {'type': 'perplexity', 'in_batch_dim': [8, 320], 'batch_size': 8, 'flops': 6328014815744}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4113209653888}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4113209653888}, {'type': 'perplexity', 'in_batch_dim': [8, 320], 'batch_size': 8, 'flops': 6328014815744}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3480408179072}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5062411866112}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6011614078336}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4429610391296}, {'type': 'perplexity', 'in_batch_dim': [8, 384], 'batch_size': 8, 'flops': 7593617765376}, {'type': 'perplexity', 'in_batch_dim': [8, 416], 'batch_size': 8, 'flops': 8226419240192}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4746011128704}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3480408179072}, {'type': 'perplexity', 'in_batch_dim': [8, 384], 'batch_size': 8, 'flops': 7593617765376}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3480408179072}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3480408179072}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5378812603520}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3796808916480}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4113209653888}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5062411866112}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5378812603520}, {'type': 'perplexity', 'in_batch_dim': [8, 320], 'batch_size': 8, 'flops': 6328014815744}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4429610391296}, {'type': 'perplexity', 'in_batch_dim': [8, 160], 'batch_size': 8, 'flops': 3164007441664}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4113209653888}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5695213340928}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3796808916480}, {'type': 'perplexity', 'in_batch_dim': [8, 160], 'batch_size': 8, 'flops': 3164007441664}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6011614078336}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5062411866112}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4429610391296}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3480408179072}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4746011128704}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3480408179072}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4113209653888}, {'type': 'perplexity', 'in_batch_dim': [8, 624], 'batch_size': 8, 'flops': 12339628826496}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4113209653888}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5695213340928}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5062411866112}, {'type': 'perplexity', 'in_batch_dim': [8, 384], 'batch_size': 8, 'flops': 7593617765376}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4429610391296}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4113209653888}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5062411866112}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4429610391296}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5695213340928}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3796808916480}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5378812603520}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4429610391296}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5378812603520}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3796808916480}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5695213340928}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4429610391296}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5062411866112}, {'type': 'perplexity', 'in_batch_dim': [8, 160], 'batch_size': 8, 'flops': 3164007441664}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4746011128704}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5695213340928}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3796808916480}, {'type': 'perplexity', 'in_batch_dim': [8, 416], 'batch_size': 8, 'flops': 8226419240192}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3796808916480}, {'type': 'perplexity', 'in_batch_dim': [8, 160], 'batch_size': 8, 'flops': 3164007441664}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4746011128704}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3796808916480}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2847606704256}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5378812603520}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4429610391296}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4113209653888}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4429610391296}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6011614078336}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4113209653888}, {'type': 'perplexity', 'in_batch_dim': [8, 320], 'batch_size': 8, 'flops': 6328014815744}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4746011128704}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4746011128704}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3796808916480}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5062411866112}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3480408179072}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3796808916480}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3480408179072}, {'type': 'perplexity', 'in_batch_dim': [8, 352], 'batch_size': 8, 'flops': 6960816290560}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5062411866112}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4429610391296}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6011614078336}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5062411866112}, {'type': 'perplexity', 'in_batch_dim': [8, 576], 'batch_size': 8, 'flops': 11390426614272}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4429610391296}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3796808916480}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6011614078336}, {'type': 'perplexity', 'in_batch_dim': [8, 432], 'batch_size': 8, 'flops': 8542819977600}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5378812603520}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5378812603520}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5062411866112}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4113209653888}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4113209653888}, {'type': 'perplexity', 'in_batch_dim': [8, 368], 'batch_size': 8, 'flops': 7277217027968}, {'type': 'perplexity', 'in_batch_dim': [8, 160], 'batch_size': 8, 'flops': 3164007441664}, {'type': 'perplexity', 'in_batch_dim': [8, 368], 'batch_size': 8, 'flops': 7277217027968}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3796808916480}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4746011128704}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4113209653888}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4429610391296}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5695213340928}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5695213340928}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5062411866112}, {'type': 'perplexity', 'in_batch_dim': [8, 432], 'batch_size': 8, 'flops': 8542819977600}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5062411866112}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4113209653888}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3480408179072}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3480408179072}, {'type': 'perplexity', 'in_batch_dim': [8, 160], 'batch_size': 8, 'flops': 3164007441664}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4113209653888}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3796808916480}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5062411866112}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 368], 'batch_size': 8, 'flops': 7277217027968}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5378812603520}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5378812603520}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5062411866112}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2847606704256}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4429610391296}, {'type': 'perplexity', 'in_batch_dim': [8, 320], 'batch_size': 8, 'flops': 6328014815744}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3796808916480}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4429610391296}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4113209653888}, {'type': 'perplexity', 'in_batch_dim': [8, 384], 'batch_size': 8, 'flops': 7593617765376}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5378812603520}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4746011128704}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3480408179072}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5062411866112}, {'type': 'perplexity', 'in_batch_dim': [8, 320], 'batch_size': 8, 'flops': 6328014815744}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5378812603520}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4113209653888}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4113209653888}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5695213340928}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3480408179072}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6011614078336}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4746011128704}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5378812603520}, {'type': 'perplexity', 'in_batch_dim': [8, 320], 'batch_size': 8, 'flops': 6328014815744}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4429610391296}, {'type': 'perplexity', 'in_batch_dim': [8, 320], 'batch_size': 8, 'flops': 6328014815744}, {'type': 'perplexity', 'in_batch_dim': [8, 416], 'batch_size': 8, 'flops': 8226419240192}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5062411866112}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5062411866112}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2847606704256}, {'type': 'perplexity', 'in_batch_dim': [8, 336], 'batch_size': 8, 'flops': 6644415553152}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6011614078336}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5062411866112}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4113209653888}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5695213340928}, {'type': 'perplexity', 'in_batch_dim': [8, 416], 'batch_size': 8, 'flops': 8226419240192}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3796808916480}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5695213340928}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4746011128704}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4429610391296}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4113209653888}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3480408179072}, {'type': 'perplexity', 'in_batch_dim': [8, 320], 'batch_size': 8, 'flops': 6328014815744}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3480408179072}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5378812603520}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6011614078336}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4429610391296}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5695213340928}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5695213340928}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3480408179072}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4429610391296}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5062411866112}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3480408179072}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4113209653888}, {'type': 'perplexity', 'in_batch_dim': [8, 352], 'batch_size': 8, 'flops': 6960816290560}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4113209653888}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2847606704256}, {'type': 'perplexity', 'in_batch_dim': [8, 160], 'batch_size': 8, 'flops': 3164007441664}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3796808916480}, {'type': 'perplexity', 'in_batch_dim': [8, 496], 'batch_size': 8, 'flops': 9808422927232}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3480408179072}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4113209653888}, {'type': 'perplexity', 'in_batch_dim': [8, 544], 'batch_size': 8, 'flops': 10757625139456}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6011614078336}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5378812603520}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5695213340928}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4746011128704}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5695213340928}, {'type': 'perplexity', 'in_batch_dim': [8, 384], 'batch_size': 8, 'flops': 7593617765376}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3480408179072}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5378812603520}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3480408179072}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5378812603520}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5062411866112}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3796808916480}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5062411866112}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5695213340928}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5378812603520}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4429610391296}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5062411866112}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3480408179072}, {'type': 'perplexity', 'in_batch_dim': [8, 656], 'batch_size': 8, 'flops': 12972430301312}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5062411866112}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3796808916480}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3480408179072}, {'type': 'perplexity', 'in_batch_dim': [8, 160], 'batch_size': 8, 'flops': 3164007441664}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5695213340928}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6011614078336}, {'type': 'perplexity', 'in_batch_dim': [8, 320], 'batch_size': 8, 'flops': 6328014815744}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5062411866112}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5062411866112}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5695213340928}, {'type': 'perplexity', 'in_batch_dim': [8, 320], 'batch_size': 8, 'flops': 6328014815744}, {'type': 'perplexity', 'in_batch_dim': [8, 384], 'batch_size': 8, 'flops': 7593617765376}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5062411866112}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4746011128704}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4113209653888}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4746011128704}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4746011128704}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3480408179072}, {'type': 'perplexity', 'in_batch_dim': [8, 320], 'batch_size': 8, 'flops': 6328014815744}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5695213340928}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5062411866112}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4113209653888}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4746011128704}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2847606704256}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3796808916480}, {'type': 'perplexity', 'in_batch_dim': [8, 336], 'batch_size': 8, 'flops': 6644415553152}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4113209653888}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4746011128704}, {'type': 'perplexity', 'in_batch_dim': [8, 496], 'batch_size': 8, 'flops': 9808422927232}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5378812603520}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5378812603520}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4429610391296}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5062411866112}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6011614078336}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4746011128704}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3796808916480}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4113209653888}, {'type': 'perplexity', 'in_batch_dim': [8, 496], 'batch_size': 8, 'flops': 9808422927232}, {'type': 'perplexity', 'in_batch_dim': [8, 448], 'batch_size': 8, 'flops': 8859220715008}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6011614078336}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3796808916480}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3480408179072}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5062411866112}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4429610391296}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4113209653888}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4746011128704}, {'type': 'perplexity', 'in_batch_dim': [8, 320], 'batch_size': 8, 'flops': 6328014815744}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6011614078336}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3480408179072}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3796808916480}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3796808916480}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4746011128704}, {'type': 'perplexity', 'in_batch_dim': [8, 416], 'batch_size': 8, 'flops': 8226419240192}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5695213340928}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6011614078336}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6011614078336}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4746011128704}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3480408179072}, {'type': 'perplexity', 'in_batch_dim': [8, 160], 'batch_size': 8, 'flops': 3164007441664}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5695213340928}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3480408179072}, {'type': 'perplexity', 'in_batch_dim': [8, 400], 'batch_size': 8, 'flops': 7910018502784}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4429610391296}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5378812603520}, {'type': 'perplexity', 'in_batch_dim': [8, 400], 'batch_size': 8, 'flops': 7910018502784}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5378812603520}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3480408179072}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6011614078336}, {'type': 'perplexity', 'in_batch_dim': [8, 160], 'batch_size': 8, 'flops': 3164007441664}, {'type': 'perplexity', 'in_batch_dim': [8, 352], 'batch_size': 8, 'flops': 6960816290560}, {'type': 'perplexity', 'in_batch_dim': [8, 320], 'batch_size': 8, 'flops': 6328014815744}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5062411866112}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5062411866112}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4113209653888}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4429610391296}, {'type': 'perplexity', 'in_batch_dim': [8, 352], 'batch_size': 8, 'flops': 6960816290560}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4113209653888}, {'type': 'perplexity', 'in_batch_dim': [8, 160], 'batch_size': 8, 'flops': 3164007441664}, {'type': 'perplexity', 'in_batch_dim': [8, 336], 'batch_size': 8, 'flops': 6644415553152}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4429610391296}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3796808916480}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5378812603520}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5378812603520}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4113209653888}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5378812603520}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3796808916480}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5378812603520}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5378812603520}, {'type': 'perplexity', 'in_batch_dim': [8, 160], 'batch_size': 8, 'flops': 3164007441664}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3480408179072}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6011614078336}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5695213340928}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5378812603520}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5378812603520}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4429610391296}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4746011128704}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6011614078336}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5062411866112}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6011614078336}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5378812603520}, {'type': 'perplexity', 'in_batch_dim': [8, 160], 'batch_size': 8, 'flops': 3164007441664}, {'type': 'perplexity', 'in_batch_dim': [8, 160], 'batch_size': 8, 'flops': 3164007441664}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5062411866112}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5062411866112}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3796808916480}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6011614078336}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4113209653888}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4746011128704}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5378812603520}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3480408179072}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3796808916480}, {'type': 'perplexity', 'in_batch_dim': [8, 320], 'batch_size': 8, 'flops': 6328014815744}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5062411866112}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4746011128704}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5062411866112}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4746011128704}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6011614078336}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3796808916480}, {'type': 'perplexity', 'in_batch_dim': [8, 160], 'batch_size': 8, 'flops': 3164007441664}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2847606704256}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4429610391296}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4746011128704}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5062411866112}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5378812603520}, {'type': 'perplexity', 'in_batch_dim': [8, 464], 'batch_size': 8, 'flops': 9175621452416}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4113209653888}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3480408179072}, {'type': 'perplexity', 'in_batch_dim': [8, 384], 'batch_size': 8, 'flops': 7593617765376}, {'type': 'perplexity', 'in_batch_dim': [8, 448], 'batch_size': 8, 'flops': 8859220715008}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5378812603520}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4113209653888}, {'type': 'perplexity', 'in_batch_dim': [8, 320], 'batch_size': 8, 'flops': 6328014815744}, {'type': 'perplexity', 'in_batch_dim': [8, 560], 'batch_size': 8, 'flops': 11074025876864}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5378812603520}, {'type': 'perplexity', 'in_batch_dim': [8, 160], 'batch_size': 8, 'flops': 3164007441664}, {'type': 'perplexity', 'in_batch_dim': [8, 320], 'batch_size': 8, 'flops': 6328014815744}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5062411866112}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6011614078336}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4746011128704}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6011614078336}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3796808916480}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4429610391296}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4113209653888}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4746011128704}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3480408179072}, {'type': 'perplexity', 'in_batch_dim': [8, 384], 'batch_size': 8, 'flops': 7593617765376}, {'type': 'perplexity', 'in_batch_dim': [8, 576], 'batch_size': 8, 'flops': 11390426614272}, {'type': 'perplexity', 'in_batch_dim': [8, 160], 'batch_size': 8, 'flops': 3164007441664}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4746011128704}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4746011128704}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4429610391296}, {'type': 'perplexity', 'in_batch_dim': [8, 336], 'batch_size': 8, 'flops': 6644415553152}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4746011128704}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3796808916480}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3796808916480}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5062411866112}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5378812603520}, {'type': 'perplexity', 'in_batch_dim': [8, 368], 'batch_size': 8, 'flops': 7277217027968}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3480408179072}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4113209653888}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4746011128704}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3480408179072}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4746011128704}, {'type': 'perplexity', 'in_batch_dim': [8, 1168], 'batch_size': 8, 'flops': 23097253898368}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5062411866112}, {'type': 'perplexity', 'in_batch_dim': [8, 160], 'batch_size': 8, 'flops': 3164007441664}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4746011128704}, {'type': 'perplexity', 'in_batch_dim': [8, 640], 'batch_size': 8, 'flops': 12656029563904}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5378812603520}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4429610391296}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4429610391296}, {'type': 'perplexity', 'in_batch_dim': [8, 384], 'batch_size': 8, 'flops': 7593617765376}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3480408179072}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4113209653888}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3796808916480}, {'type': 'perplexity', 'in_batch_dim': [8, 336], 'batch_size': 8, 'flops': 6644415553152}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3480408179072}, {'type': 'perplexity', 'in_batch_dim': [6, 208], 'batch_size': 8, 'flops': 4113209653888}], 'timestamp': '2025-09-10 02:20:55.262898', 'step': 2352, 'epoch': 1} {'type': 'pplx', 'content': 18824440.007556766, 'timestamp': '2025-09-10 02:20:55.266064', 'step': 2352, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 8543129804160}, 'timestamp': '2025-09-10 02:20:55.295642', 'step': 2352, 'epoch': 1} {'type': 'loss', 'content': 0.012922325171530247, 'timestamp': '2025-09-10 02:20:55.302810', 'step': 2353, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 7119318626112}, 'timestamp': '2025-09-10 02:20:55.333710', 'step': 2353, 'epoch': 1} {'type': 'loss', 'content': 0.0021893230732530355, 'timestamp': '2025-09-10 02:20:55.341004', 'step': 2354, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 5695507448064}, 'timestamp': '2025-09-10 02:20:55.371723', 'step': 2354, 'epoch': 1} {'type': 'loss', 'content': 0.0217142216861248, 'timestamp': '2025-09-10 02:20:55.375719', 'step': 2355, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 528], 'flops': 15662185694400}, 'timestamp': '2025-09-10 02:20:55.422689', 'step': 2355, 'epoch': 1} {'type': 'loss', 'content': 0.0012116653379052877, 'timestamp': '2025-09-10 02:20:55.462794', 'step': 2356, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [3, 224], 'flops': 4983601869792}, 'timestamp': '2025-09-10 02:20:55.515007', 'step': 2356, 'epoch': 1} {'type': 'loss', 'content': 0.0030051611829549074, 'timestamp': '2025-09-10 02:20:55.517186', 'step': 2357, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 6644714900096}, 'timestamp': '2025-09-10 02:20:55.565649', 'step': 2357, 'epoch': 2} {'type': 'loss', 'content': 0.001868675695732236, 'timestamp': '2025-09-10 02:20:55.570508', 'step': 2358, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 7593922352128}, 'timestamp': '2025-09-10 02:20:55.601669', 'step': 2358, 'epoch': 2} {'type': 'loss', 'content': 0.006056786980479956, 'timestamp': '2025-09-10 02:20:55.609223', 'step': 2359, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 6644714900096}, 'timestamp': '2025-09-10 02:20:55.640144', 'step': 2359, 'epoch': 2} {'type': 'loss', 'content': 0.01372221577912569, 'timestamp': '2025-09-10 02:20:55.668003', 'step': 2360, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 6170111174080}, 'timestamp': '2025-09-10 02:20:55.698857', 'step': 2360, 'epoch': 2} {'type': 'loss', 'content': 0.0012454865500330925, 'timestamp': '2025-09-10 02:20:55.703516', 'step': 2361, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 6644714900096}, 'timestamp': '2025-09-10 02:20:55.734399', 'step': 2361, 'epoch': 2} {'type': 'loss', 'content': 0.0016425231005996466, 'timestamp': '2025-09-10 02:20:55.741488', 'step': 2362, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 176], 'flops': 5220903722048}, 'timestamp': '2025-09-10 02:20:55.771791', 'step': 2362, 'epoch': 2} {'type': 'loss', 'content': 0.011411800980567932, 'timestamp': '2025-09-10 02:20:55.775839', 'step': 2363, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 6644714900096}, 'timestamp': '2025-09-10 02:20:55.807165', 'step': 2363, 'epoch': 2} {'type': 'loss', 'content': 0.035575442016124725, 'timestamp': '2025-09-10 02:20:55.835049', 'step': 2364, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 7119318626112}, 'timestamp': '2025-09-10 02:20:55.865716', 'step': 2364, 'epoch': 2} {'type': 'loss', 'content': 0.028888003900647163, 'timestamp': '2025-09-10 02:20:55.870861', 'step': 2365, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 8068526078144}, 'timestamp': '2025-09-10 02:20:55.902237', 'step': 2365, 'epoch': 2} {'type': 'loss', 'content': 0.013811533339321613, 'timestamp': '2025-09-10 02:20:55.912491', 'step': 2366, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 7593922352128}, 'timestamp': '2025-09-10 02:20:55.943287', 'step': 2366, 'epoch': 2} {'type': 'loss', 'content': 0.001658704481087625, 'timestamp': '2025-09-10 02:20:55.951124', 'step': 2367, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 6170111174080}, 'timestamp': '2025-09-10 02:20:55.982180', 'step': 2367, 'epoch': 2} {'type': 'loss', 'content': 0.007761240005493164, 'timestamp': '2025-09-10 02:20:56.009977', 'step': 2368, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 6170111174080}, 'timestamp': '2025-09-10 02:20:56.041483', 'step': 2368, 'epoch': 2} {'type': 'loss', 'content': 0.019760314375162125, 'timestamp': '2025-09-10 02:20:56.045864', 'step': 2369, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 5695507448064}, 'timestamp': '2025-09-10 02:20:56.076662', 'step': 2369, 'epoch': 2} {'type': 'loss', 'content': 0.0020639884751290083, 'timestamp': '2025-09-10 02:20:56.081232', 'step': 2370, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 336], 'flops': 9966940982208}, 'timestamp': '2025-09-10 02:20:56.114724', 'step': 2370, 'epoch': 2} {'type': 'loss', 'content': 0.0026808734983205795, 'timestamp': '2025-09-10 02:20:56.128079', 'step': 2371, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 5695507448064}, 'timestamp': '2025-09-10 02:20:56.159084', 'step': 2371, 'epoch': 2} {'type': 'loss', 'content': 0.0017675552517175674, 'timestamp': '2025-09-10 02:20:56.184457', 'step': 2372, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 176], 'flops': 5220903722048}, 'timestamp': '2025-09-10 02:20:56.215102', 'step': 2372, 'epoch': 2} {'type': 'loss', 'content': 0.0011209690710529685, 'timestamp': '2025-09-10 02:20:56.217329', 'step': 2373, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 304], 'flops': 9017733530176}, 'timestamp': '2025-09-10 02:20:56.248801', 'step': 2373, 'epoch': 2} {'type': 'loss', 'content': 0.01740814931690693, 'timestamp': '2025-09-10 02:20:56.261180', 'step': 2374, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 5695507448064}, 'timestamp': '2025-09-10 02:20:56.293507', 'step': 2374, 'epoch': 2} {'type': 'loss', 'content': 0.014776641502976418, 'timestamp': '2025-09-10 02:20:56.298010', 'step': 2375, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 8068526078144}, 'timestamp': '2025-09-10 02:20:56.329056', 'step': 2375, 'epoch': 2} {'type': 'loss', 'content': 0.006744783371686935, 'timestamp': '2025-09-10 02:20:56.360277', 'step': 2376, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 5695507448064}, 'timestamp': '2025-09-10 02:20:56.391300', 'step': 2376, 'epoch': 2} {'type': 'loss', 'content': 0.004561256151646376, 'timestamp': '2025-09-10 02:20:56.393677', 'step': 2377, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 8068526078144}, 'timestamp': '2025-09-10 02:20:56.425799', 'step': 2377, 'epoch': 2} {'type': 'loss', 'content': 0.02400498278439045, 'timestamp': '2025-09-10 02:20:56.435986', 'step': 2378, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 176], 'flops': 5220903722048}, 'timestamp': '2025-09-10 02:20:56.467356', 'step': 2378, 'epoch': 2} {'type': 'loss', 'content': 0.0026017860509455204, 'timestamp': '2025-09-10 02:20:56.471485', 'step': 2379, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 6170111174080}, 'timestamp': '2025-09-10 02:20:56.502242', 'step': 2379, 'epoch': 2} {'type': 'loss', 'content': 0.0014477769145742059, 'timestamp': '2025-09-10 02:20:56.530026', 'step': 2380, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 7119318626112}, 'timestamp': '2025-09-10 02:20:56.562359', 'step': 2380, 'epoch': 2} {'type': 'loss', 'content': 0.0038361712358891964, 'timestamp': '2025-09-10 02:20:56.567242', 'step': 2381, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 6644714900096}, 'timestamp': '2025-09-10 02:20:56.599463', 'step': 2381, 'epoch': 2} {'type': 'loss', 'content': 0.015629353001713753, 'timestamp': '2025-09-10 02:20:56.606433', 'step': 2382, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 6644714900096}, 'timestamp': '2025-09-10 02:20:56.639786', 'step': 2382, 'epoch': 2} {'type': 'loss', 'content': 0.0013419255847111344, 'timestamp': '2025-09-10 02:20:56.647012', 'step': 2383, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 6170111174080}, 'timestamp': '2025-09-10 02:20:56.677425', 'step': 2383, 'epoch': 2} {'type': 'loss', 'content': 0.012586474418640137, 'timestamp': '2025-09-10 02:20:56.705245', 'step': 2384, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 4746299996032}, 'timestamp': '2025-09-10 02:20:56.736712', 'step': 2384, 'epoch': 2} {'type': 'loss', 'content': 0.011415432207286358, 'timestamp': '2025-09-10 02:20:56.739923', 'step': 2385, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 7119318626112}, 'timestamp': '2025-09-10 02:20:56.773942', 'step': 2385, 'epoch': 2} {'type': 'loss', 'content': 0.03539576753973961, 'timestamp': '2025-09-10 02:20:56.781014', 'step': 2386, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 352], 'flops': 10441544708224}, 'timestamp': '2025-09-10 02:20:56.815401', 'step': 2386, 'epoch': 2} {'type': 'loss', 'content': 0.0012404642766341567, 'timestamp': '2025-09-10 02:20:56.829064', 'step': 2387, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 6170111174080}, 'timestamp': '2025-09-10 02:20:56.860093', 'step': 2387, 'epoch': 2} {'type': 'loss', 'content': 0.0017915985081344843, 'timestamp': '2025-09-10 02:20:56.887618', 'step': 2388, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 6644714900096}, 'timestamp': '2025-09-10 02:20:56.919137', 'step': 2388, 'epoch': 2} {'type': 'loss', 'content': 0.01956382766366005, 'timestamp': '2025-09-10 02:20:56.923666', 'step': 2389, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 176], 'flops': 5220903722048}, 'timestamp': '2025-09-10 02:20:56.955340', 'step': 2389, 'epoch': 2} {'type': 'loss', 'content': 0.012075236067175865, 'timestamp': '2025-09-10 02:20:56.959135', 'step': 2390, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 7593922352128}, 'timestamp': '2025-09-10 02:20:56.990995', 'step': 2390, 'epoch': 2} {'type': 'loss', 'content': 0.015462463721632957, 'timestamp': '2025-09-10 02:20:56.998684', 'step': 2391, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 8068526078144}, 'timestamp': '2025-09-10 02:20:57.029491', 'step': 2391, 'epoch': 2} {'type': 'loss', 'content': 0.03371422737836838, 'timestamp': '2025-09-10 02:20:57.060357', 'step': 2392, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 4746299996032}, 'timestamp': '2025-09-10 02:20:57.092148', 'step': 2392, 'epoch': 2} {'type': 'loss', 'content': 0.00984213501214981, 'timestamp': '2025-09-10 02:20:57.094323', 'step': 2393, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 6644714900096}, 'timestamp': '2025-09-10 02:20:57.137137', 'step': 2393, 'epoch': 2} {'type': 'loss', 'content': 0.0032658951822668314, 'timestamp': '2025-09-10 02:20:57.143803', 'step': 2394, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-10 02:20:57.176805', 'step': 2394, 'epoch': 2} {'type': 'loss', 'content': 0.00632342416793108, 'timestamp': '2025-09-10 02:20:57.180620', 'step': 2395, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 5695507448064}, 'timestamp': '2025-09-10 02:20:57.211466', 'step': 2395, 'epoch': 2} {'type': 'loss', 'content': 0.027958959341049194, 'timestamp': '2025-09-10 02:20:57.236629', 'step': 2396, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 6644714900096}, 'timestamp': '2025-09-10 02:20:57.268322', 'step': 2396, 'epoch': 2} {'type': 'loss', 'content': 0.007394559681415558, 'timestamp': '2025-09-10 02:20:57.272723', 'step': 2397, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 7119318626112}, 'timestamp': '2025-09-10 02:20:57.305596', 'step': 2397, 'epoch': 2} {'type': 'loss', 'content': 0.04190784692764282, 'timestamp': '2025-09-10 02:20:57.312749', 'step': 2398, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 5695507448064}, 'timestamp': '2025-09-10 02:20:57.344112', 'step': 2398, 'epoch': 2} {'type': 'loss', 'content': 0.006203221622854471, 'timestamp': '2025-09-10 02:20:57.348174', 'step': 2399, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 336], 'flops': 9966940982208}, 'timestamp': '2025-09-10 02:20:57.381966', 'step': 2399, 'epoch': 2} {'type': 'loss', 'content': 0.003813191084191203, 'timestamp': '2025-09-10 02:20:57.416214', 'step': 2400, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 6644714900096}, 'timestamp': '2025-09-10 02:20:57.449093', 'step': 2400, 'epoch': 2} {'type': 'loss', 'content': 0.0005948929465375841, 'timestamp': '2025-09-10 02:20:57.453635', 'step': 2401, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 176], 'flops': 5220903722048}, 'timestamp': '2025-09-10 02:20:57.484630', 'step': 2401, 'epoch': 2} {'type': 'loss', 'content': 0.04219771549105644, 'timestamp': '2025-09-10 02:20:57.488654', 'step': 2402, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 6644714900096}, 'timestamp': '2025-09-10 02:20:57.519922', 'step': 2402, 'epoch': 2} {'type': 'loss', 'content': 0.006709757260978222, 'timestamp': '2025-09-10 02:20:57.526953', 'step': 2403, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 8068526078144}, 'timestamp': '2025-09-10 02:20:57.557872', 'step': 2403, 'epoch': 2} {'type': 'loss', 'content': 0.0066467165015637875, 'timestamp': '2025-09-10 02:20:57.588893', 'step': 2404, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 6170111174080}, 'timestamp': '2025-09-10 02:20:57.620246', 'step': 2404, 'epoch': 2} {'type': 'loss', 'content': 0.0014069050084799528, 'timestamp': '2025-09-10 02:20:57.624577', 'step': 2405, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 8543129804160}, 'timestamp': '2025-09-10 02:20:57.656236', 'step': 2405, 'epoch': 2} {'type': 'loss', 'content': 0.022929934784770012, 'timestamp': '2025-09-10 02:20:57.666779', 'step': 2406, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 8543129804160}, 'timestamp': '2025-09-10 02:20:57.698496', 'step': 2406, 'epoch': 2} {'type': 'loss', 'content': 0.03234897926449776, 'timestamp': '2025-09-10 02:20:57.709451', 'step': 2407, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 4746299996032}, 'timestamp': '2025-09-10 02:20:57.742453', 'step': 2407, 'epoch': 2} {'type': 'loss', 'content': 0.0065283398143947124, 'timestamp': '2025-09-10 02:20:57.766252', 'step': 2408, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 928], 'flops': 27527278844800}, 'timestamp': '2025-09-10 02:20:57.838691', 'step': 2408, 'epoch': 2} {'type': 'loss', 'content': 0.014821560122072697, 'timestamp': '2025-09-10 02:20:57.870500', 'step': 2409, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 5695507448064}, 'timestamp': '2025-09-10 02:20:57.904843', 'step': 2409, 'epoch': 2} {'type': 'loss', 'content': 0.003778102109208703, 'timestamp': '2025-09-10 02:20:57.909324', 'step': 2410, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 8068526078144}, 'timestamp': '2025-09-10 02:20:57.943833', 'step': 2410, 'epoch': 2} {'type': 'loss', 'content': 0.012805613689124584, 'timestamp': '2025-09-10 02:20:57.954035', 'step': 2411, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 7119318626112}, 'timestamp': '2025-09-10 02:20:57.985528', 'step': 2411, 'epoch': 2} {'type': 'loss', 'content': 0.007993345148861408, 'timestamp': '2025-09-10 02:20:58.013838', 'step': 2412, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 400], 'flops': 11865355886272}, 'timestamp': '2025-09-10 02:20:58.050742', 'step': 2412, 'epoch': 2} {'type': 'loss', 'content': 0.006639838218688965, 'timestamp': '2025-09-10 02:20:58.065968', 'step': 2413, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-10 02:20:58.096623', 'step': 2413, 'epoch': 2} {'type': 'loss', 'content': 0.00533502921462059, 'timestamp': '2025-09-10 02:20:58.099109', 'step': 2414, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 6644714900096}, 'timestamp': '2025-09-10 02:20:58.129457', 'step': 2414, 'epoch': 2} {'type': 'loss', 'content': 0.02578054927289486, 'timestamp': '2025-09-10 02:20:58.136534', 'step': 2415, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 8068526078144}, 'timestamp': '2025-09-10 02:20:58.167819', 'step': 2415, 'epoch': 2} {'type': 'loss', 'content': 0.007064398843795061, 'timestamp': '2025-09-10 02:20:58.198746', 'step': 2416, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 7593922352128}, 'timestamp': '2025-09-10 02:20:58.228972', 'step': 2416, 'epoch': 2} {'type': 'loss', 'content': 0.01666208915412426, 'timestamp': '2025-09-10 02:20:58.234269', 'step': 2417, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 304], 'flops': 9017733530176}, 'timestamp': '2025-09-10 02:20:58.266396', 'step': 2417, 'epoch': 2} {'type': 'loss', 'content': 0.036728501319885254, 'timestamp': '2025-09-10 02:20:58.278539', 'step': 2418, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 6170111174080}, 'timestamp': '2025-09-10 02:20:58.314188', 'step': 2418, 'epoch': 2} {'type': 'loss', 'content': 0.011314035393297672, 'timestamp': '2025-09-10 02:20:58.320791', 'step': 2419, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 7593922352128}, 'timestamp': '2025-09-10 02:20:58.352422', 'step': 2419, 'epoch': 2} {'type': 'loss', 'content': 0.004533576779067516, 'timestamp': '2025-09-10 02:20:58.380797', 'step': 2420, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 6644714900096}, 'timestamp': '2025-09-10 02:20:58.412606', 'step': 2420, 'epoch': 2} {'type': 'loss', 'content': 0.005247695837169886, 'timestamp': '2025-09-10 02:20:58.417012', 'step': 2421, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 416], 'flops': 12339959612288}, 'timestamp': '2025-09-10 02:20:58.456744', 'step': 2421, 'epoch': 2} {'type': 'loss', 'content': 0.019029613584280014, 'timestamp': '2025-09-10 02:20:58.472721', 'step': 2422, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 6644714900096}, 'timestamp': '2025-09-10 02:20:58.504313', 'step': 2422, 'epoch': 2} {'type': 'loss', 'content': 0.012553774751722813, 'timestamp': '2025-09-10 02:20:58.511447', 'step': 2423, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 176], 'flops': 5220903722048}, 'timestamp': '2025-09-10 02:20:58.541783', 'step': 2423, 'epoch': 2} {'type': 'loss', 'content': 0.005390453618019819, 'timestamp': '2025-09-10 02:20:58.566917', 'step': 2424, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 5695507448064}, 'timestamp': '2025-09-10 02:20:58.602231', 'step': 2424, 'epoch': 2} {'type': 'loss', 'content': 0.02095671556890011, 'timestamp': '2025-09-10 02:20:58.604478', 'step': 2425, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 7593922352128}, 'timestamp': '2025-09-10 02:20:58.634822', 'step': 2425, 'epoch': 2} {'type': 'loss', 'content': 0.011055312119424343, 'timestamp': '2025-09-10 02:20:58.642553', 'step': 2426, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 416], 'flops': 12339959612288}, 'timestamp': '2025-09-10 02:20:58.682729', 'step': 2426, 'epoch': 2} {'type': 'loss', 'content': 0.010104840621352196, 'timestamp': '2025-09-10 02:20:58.698595', 'step': 2427, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 7119318626112}, 'timestamp': '2025-09-10 02:20:58.730437', 'step': 2427, 'epoch': 2} {'type': 'loss', 'content': 0.00237080454826355, 'timestamp': '2025-09-10 02:20:58.758616', 'step': 2428, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 320], 'flops': 9492337256192}, 'timestamp': '2025-09-10 02:20:58.790689', 'step': 2428, 'epoch': 2} {'type': 'loss', 'content': 0.009992515668272972, 'timestamp': '2025-09-10 02:20:58.800696', 'step': 2429, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 6644714900096}, 'timestamp': '2025-09-10 02:20:58.833800', 'step': 2429, 'epoch': 2} {'type': 'loss', 'content': 0.029752474278211594, 'timestamp': '2025-09-10 02:20:58.840812', 'step': 2430, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 6170111174080}, 'timestamp': '2025-09-10 02:20:58.875836', 'step': 2430, 'epoch': 2} {'type': 'loss', 'content': 0.0017465923447161913, 'timestamp': '2025-09-10 02:20:58.882412', 'step': 2431, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 8543129804160}, 'timestamp': '2025-09-10 02:20:58.913422', 'step': 2431, 'epoch': 2} {'type': 'loss', 'content': 0.02968502603471279, 'timestamp': '2025-09-10 02:20:58.945044', 'step': 2432, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 400], 'flops': 11865355886272}, 'timestamp': '2025-09-10 02:20:58.982449', 'step': 2432, 'epoch': 2} {'type': 'loss', 'content': 0.007202841341495514, 'timestamp': '2025-09-10 02:20:58.997633', 'step': 2433, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 6170111174080}, 'timestamp': '2025-09-10 02:20:59.036354', 'step': 2433, 'epoch': 2} {'type': 'loss', 'content': 0.028496667742729187, 'timestamp': '2025-09-10 02:20:59.043146', 'step': 2434, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 176], 'flops': 5220903722048}, 'timestamp': '2025-09-10 02:20:59.078570', 'step': 2434, 'epoch': 2} {'type': 'loss', 'content': 0.010356120765209198, 'timestamp': '2025-09-10 02:20:59.082482', 'step': 2435, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 336], 'flops': 9966940982208}, 'timestamp': '2025-09-10 02:20:59.118399', 'step': 2435, 'epoch': 2} {'type': 'loss', 'content': 0.010988089255988598, 'timestamp': '2025-09-10 02:20:59.152646', 'step': 2436, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 400], 'flops': 11865355886272}, 'timestamp': '2025-09-10 02:20:59.189861', 'step': 2436, 'epoch': 2} {'type': 'loss', 'content': 0.013273806311190128, 'timestamp': '2025-09-10 02:20:59.204988', 'step': 2437, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 5695507448064}, 'timestamp': '2025-09-10 02:20:59.240005', 'step': 2437, 'epoch': 2} {'type': 'loss', 'content': 0.0162824559956789, 'timestamp': '2025-09-10 02:20:59.251543', 'step': 2438, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 304], 'flops': 9017733530176}, 'timestamp': '2025-09-10 02:20:59.291319', 'step': 2438, 'epoch': 2} {'type': 'loss', 'content': 0.015538596548140049, 'timestamp': '2025-09-10 02:20:59.302867', 'step': 2439, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 8068526078144}, 'timestamp': '2025-09-10 02:20:59.336710', 'step': 2439, 'epoch': 2} {'type': 'loss', 'content': 0.024137185886502266, 'timestamp': '2025-09-10 02:20:59.367489', 'step': 2440, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 5695507448064}, 'timestamp': '2025-09-10 02:20:59.399054', 'step': 2440, 'epoch': 2} {'type': 'loss', 'content': 0.012249810621142387, 'timestamp': '2025-09-10 02:20:59.403560', 'step': 2441, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 6170111174080}, 'timestamp': '2025-09-10 02:20:59.436669', 'step': 2441, 'epoch': 2} {'type': 'loss', 'content': 0.014839425683021545, 'timestamp': '2025-09-10 02:20:59.443398', 'step': 2442, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 352], 'flops': 10441544708224}, 'timestamp': '2025-09-10 02:20:59.481062', 'step': 2442, 'epoch': 2} {'type': 'loss', 'content': 0.024386601522564888, 'timestamp': '2025-09-10 02:20:59.494734', 'step': 2443, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 5695507448064}, 'timestamp': '2025-09-10 02:20:59.526751', 'step': 2443, 'epoch': 2} {'type': 'loss', 'content': 0.007399399299174547, 'timestamp': '2025-09-10 02:20:59.551681', 'step': 2444, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 7119318626112}, 'timestamp': '2025-09-10 02:20:59.584018', 'step': 2444, 'epoch': 2} {'type': 'loss', 'content': 0.0049104467034339905, 'timestamp': '2025-09-10 02:20:59.588711', 'step': 2445, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 496], 'flops': 14712978242368}, 'timestamp': '2025-09-10 02:20:59.632271', 'step': 2445, 'epoch': 2} {'type': 'loss', 'content': 0.0064061665907502174, 'timestamp': '2025-09-10 02:20:59.649910', 'step': 2446, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 6644714900096}, 'timestamp': '2025-09-10 02:20:59.683185', 'step': 2446, 'epoch': 2} {'type': 'loss', 'content': 0.026691416278481483, 'timestamp': '2025-09-10 02:20:59.689889', 'step': 2447, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 6170111174080}, 'timestamp': '2025-09-10 02:20:59.724090', 'step': 2447, 'epoch': 2} {'type': 'loss', 'content': 0.004400145262479782, 'timestamp': '2025-09-10 02:20:59.751749', 'step': 2448, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 336], 'flops': 9966940982208}, 'timestamp': '2025-09-10 02:20:59.786408', 'step': 2448, 'epoch': 2} {'type': 'loss', 'content': 0.002782547613605857, 'timestamp': '2025-09-10 02:20:59.798614', 'step': 2449, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 5695507448064}, 'timestamp': '2025-09-10 02:20:59.830361', 'step': 2449, 'epoch': 2} {'type': 'loss', 'content': 0.030095087364315987, 'timestamp': '2025-09-10 02:20:59.834411', 'step': 2450, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 6644714900096}, 'timestamp': '2025-09-10 02:20:59.867471', 'step': 2450, 'epoch': 2} {'type': 'loss', 'content': 0.004379054065793753, 'timestamp': '2025-09-10 02:20:59.874120', 'step': 2451, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 480], 'flops': 14238374516352}, 'timestamp': '2025-09-10 02:20:59.916375', 'step': 2451, 'epoch': 2} {'type': 'loss', 'content': 0.009237154386937618, 'timestamp': '2025-09-10 02:20:59.954598', 'step': 2452, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 6170111174080}, 'timestamp': '2025-09-10 02:20:59.986634', 'step': 2452, 'epoch': 2} {'type': 'loss', 'content': 0.013287164270877838, 'timestamp': '2025-09-10 02:20:59.990682', 'step': 2453, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 7593922352128}, 'timestamp': '2025-09-10 02:21:00.022911', 'step': 2453, 'epoch': 2} {'type': 'loss', 'content': 0.0038054410833865404, 'timestamp': '2025-09-10 02:21:00.030203', 'step': 2454, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 5695507448064}, 'timestamp': '2025-09-10 02:21:00.061572', 'step': 2454, 'epoch': 2} {'type': 'loss', 'content': 0.006211976520717144, 'timestamp': '2025-09-10 02:21:00.065972', 'step': 2455, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 5695507448064}, 'timestamp': '2025-09-10 02:21:00.098272', 'step': 2455, 'epoch': 2} {'type': 'loss', 'content': 0.011952430941164494, 'timestamp': '2025-09-10 02:21:00.123510', 'step': 2456, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 6644714900096}, 'timestamp': '2025-09-10 02:21:00.158649', 'step': 2456, 'epoch': 2} {'type': 'loss', 'content': 0.012498662807047367, 'timestamp': '2025-09-10 02:21:00.163062', 'step': 2457, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 4746299996032}, 'timestamp': '2025-09-10 02:21:00.196415', 'step': 2457, 'epoch': 2} {'type': 'loss', 'content': 0.0178080927580595, 'timestamp': '2025-09-10 02:21:00.198975', 'step': 2458, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 6644714900096}, 'timestamp': '2025-09-10 02:21:00.230365', 'step': 2458, 'epoch': 2} {'type': 'loss', 'content': 0.017546426504850388, 'timestamp': '2025-09-10 02:21:00.237228', 'step': 2459, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 7593922352128}, 'timestamp': '2025-09-10 02:21:00.268659', 'step': 2459, 'epoch': 2} {'type': 'loss', 'content': 0.023515610024333, 'timestamp': '2025-09-10 02:21:00.297119', 'step': 2460, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 304], 'flops': 9017733530176}, 'timestamp': '2025-09-10 02:21:00.329743', 'step': 2460, 'epoch': 2} {'type': 'loss', 'content': 0.0053445808589458466, 'timestamp': '2025-09-10 02:21:00.338117', 'step': 2461, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 8543129804160}, 'timestamp': '2025-09-10 02:21:00.371739', 'step': 2461, 'epoch': 2} {'type': 'loss', 'content': 0.002676423406228423, 'timestamp': '2025-09-10 02:21:00.382019', 'step': 2462, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 7119318626112}, 'timestamp': '2025-09-10 02:21:00.414165', 'step': 2462, 'epoch': 2} {'type': 'loss', 'content': 0.004749422427266836, 'timestamp': '2025-09-10 02:21:00.421359', 'step': 2463, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 6170111174080}, 'timestamp': '2025-09-10 02:21:00.453478', 'step': 2463, 'epoch': 2} {'type': 'loss', 'content': 0.025678569450974464, 'timestamp': '2025-09-10 02:21:00.480924', 'step': 2464, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 7593922352128}, 'timestamp': '2025-09-10 02:21:00.513252', 'step': 2464, 'epoch': 2} {'type': 'loss', 'content': 0.027631347998976707, 'timestamp': '2025-09-10 02:21:00.518313', 'step': 2465, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 8068526078144}, 'timestamp': '2025-09-10 02:21:00.551116', 'step': 2465, 'epoch': 2} {'type': 'loss', 'content': 0.007423713803291321, 'timestamp': '2025-09-10 02:21:00.560967', 'step': 2466, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 6644714900096}, 'timestamp': '2025-09-10 02:21:00.592568', 'step': 2466, 'epoch': 2} {'type': 'loss', 'content': 0.0034356131218373775, 'timestamp': '2025-09-10 02:21:00.599184', 'step': 2467, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 320], 'flops': 9492337256192}, 'timestamp': '2025-09-10 02:21:00.631300', 'step': 2467, 'epoch': 2} {'type': 'loss', 'content': 0.0028629249427467585, 'timestamp': '2025-09-10 02:21:00.664453', 'step': 2468, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 7119318626112}, 'timestamp': '2025-09-10 02:21:00.697219', 'step': 2468, 'epoch': 2} {'type': 'loss', 'content': 0.003994452767074108, 'timestamp': '2025-09-10 02:21:00.702035', 'step': 2469, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 7119318626112}, 'timestamp': '2025-09-10 02:21:00.733794', 'step': 2469, 'epoch': 2} {'type': 'loss', 'content': 0.007717865519225597, 'timestamp': '2025-09-10 02:21:00.740914', 'step': 2470, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 6170111174080}, 'timestamp': '2025-09-10 02:21:00.773340', 'step': 2470, 'epoch': 2} {'type': 'loss', 'content': 0.008052381686866283, 'timestamp': '2025-09-10 02:21:00.779842', 'step': 2471, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 7119318626112}, 'timestamp': '2025-09-10 02:21:00.811917', 'step': 2471, 'epoch': 2} {'type': 'loss', 'content': 0.023543791845440865, 'timestamp': '2025-09-10 02:21:00.839978', 'step': 2472, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 6644714900096}, 'timestamp': '2025-09-10 02:21:00.872023', 'step': 2472, 'epoch': 2} {'type': 'loss', 'content': 0.004500131588429213, 'timestamp': '2025-09-10 02:21:00.876183', 'step': 2473, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 336], 'flops': 9966940982208}, 'timestamp': '2025-09-10 02:21:00.910614', 'step': 2473, 'epoch': 2} {'type': 'loss', 'content': 0.01701648160815239, 'timestamp': '2025-09-10 02:21:00.923993', 'step': 2474, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 6170111174080}, 'timestamp': '2025-09-10 02:21:00.963743', 'step': 2474, 'epoch': 2} {'type': 'loss', 'content': 0.04145175218582153, 'timestamp': '2025-09-10 02:21:00.970404', 'step': 2475, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 320], 'flops': 9492337256192}, 'timestamp': '2025-09-10 02:21:01.003069', 'step': 2475, 'epoch': 2} {'type': 'loss', 'content': 0.027746308594942093, 'timestamp': '2025-09-10 02:21:01.036297', 'step': 2476, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 336], 'flops': 9966940982208}, 'timestamp': '2025-09-10 02:21:01.069237', 'step': 2476, 'epoch': 2} {'type': 'loss', 'content': 0.0057420432567596436, 'timestamp': '2025-09-10 02:21:01.081920', 'step': 2477, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 304], 'flops': 9017733530176}, 'timestamp': '2025-09-10 02:21:01.114764', 'step': 2477, 'epoch': 2} {'type': 'loss', 'content': 0.024375727400183678, 'timestamp': '2025-09-10 02:21:01.126557', 'step': 2478, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 8068526078144}, 'timestamp': '2025-09-10 02:21:01.159373', 'step': 2478, 'epoch': 2} {'type': 'loss', 'content': 0.004905619192868471, 'timestamp': '2025-09-10 02:21:01.169020', 'step': 2479, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 6644714900096}, 'timestamp': '2025-09-10 02:21:01.200895', 'step': 2479, 'epoch': 2} {'type': 'loss', 'content': 0.014483463950455189, 'timestamp': '2025-09-10 02:21:01.228375', 'step': 2480, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 8543129804160}, 'timestamp': '2025-09-10 02:21:01.261928', 'step': 2480, 'epoch': 2} {'type': 'loss', 'content': 0.015179364010691643, 'timestamp': '2025-09-10 02:21:01.269896', 'step': 2481, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 4746299996032}, 'timestamp': '2025-09-10 02:21:01.302092', 'step': 2481, 'epoch': 2} {'type': 'loss', 'content': 0.010475664399564266, 'timestamp': '2025-09-10 02:21:01.304553', 'step': 2482, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 8068526078144}, 'timestamp': '2025-09-10 02:21:01.335929', 'step': 2482, 'epoch': 2} {'type': 'loss', 'content': 0.008311014622449875, 'timestamp': '2025-09-10 02:21:01.345565', 'step': 2483, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 384], 'flops': 11390752160256}, 'timestamp': '2025-09-10 02:21:01.383373', 'step': 2483, 'epoch': 2} {'type': 'loss', 'content': 0.018926413729786873, 'timestamp': '2025-09-10 02:21:01.418320', 'step': 2484, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-10 02:21:01.453949', 'step': 2484, 'epoch': 2} {'type': 'loss', 'content': 0.008039912208914757, 'timestamp': '2025-09-10 02:21:01.457516', 'step': 2485, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 320], 'flops': 9492337256192}, 'timestamp': '2025-09-10 02:21:01.494246', 'step': 2485, 'epoch': 2} {'type': 'loss', 'content': 0.004062869120389223, 'timestamp': '2025-09-10 02:21:01.506816', 'step': 2486, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-10 02:21:01.539890', 'step': 2486, 'epoch': 2} {'type': 'loss', 'content': 0.016554275527596474, 'timestamp': '2025-09-10 02:21:01.541912', 'step': 2487, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 7119318626112}, 'timestamp': '2025-09-10 02:21:01.573571', 'step': 2487, 'epoch': 2} {'type': 'loss', 'content': 0.016327768564224243, 'timestamp': '2025-09-10 02:21:01.601646', 'step': 2488, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 5695507448064}, 'timestamp': '2025-09-10 02:21:01.633487', 'step': 2488, 'epoch': 2} {'type': 'loss', 'content': 0.0051765465177595615, 'timestamp': '2025-09-10 02:21:01.635807', 'step': 2489, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 7119318626112}, 'timestamp': '2025-09-10 02:21:01.672950', 'step': 2489, 'epoch': 2} {'type': 'loss', 'content': 0.0068465410731732845, 'timestamp': '2025-09-10 02:21:01.680286', 'step': 2490, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 7593922352128}, 'timestamp': '2025-09-10 02:21:01.713239', 'step': 2490, 'epoch': 2} {'type': 'loss', 'content': 0.01244097389280796, 'timestamp': '2025-09-10 02:21:01.720561', 'step': 2491, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 304], 'flops': 9017733530176}, 'timestamp': '2025-09-10 02:21:01.758847', 'step': 2491, 'epoch': 2} {'type': 'loss', 'content': 0.0072895921766757965, 'timestamp': '2025-09-10 02:21:01.791378', 'step': 2492, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 6644714900096}, 'timestamp': '2025-09-10 02:21:01.824943', 'step': 2492, 'epoch': 2} {'type': 'loss', 'content': 0.010245480574667454, 'timestamp': '2025-09-10 02:21:01.829063', 'step': 2493, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 304], 'flops': 9017733530176}, 'timestamp': '2025-09-10 02:21:01.861585', 'step': 2493, 'epoch': 2} {'type': 'loss', 'content': 0.00806692149490118, 'timestamp': '2025-09-10 02:21:01.873213', 'step': 2494, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 6170111174080}, 'timestamp': '2025-09-10 02:21:01.908408', 'step': 2494, 'epoch': 2} {'type': 'loss', 'content': 0.003785413922742009, 'timestamp': '2025-09-10 02:21:01.914785', 'step': 2495, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 6644714900096}, 'timestamp': '2025-09-10 02:21:01.950757', 'step': 2495, 'epoch': 2} {'type': 'loss', 'content': 0.003561714431270957, 'timestamp': '2025-09-10 02:21:01.978198', 'step': 2496, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 7593922352128}, 'timestamp': '2025-09-10 02:21:02.013614', 'step': 2496, 'epoch': 2} {'type': 'loss', 'content': 0.0025676547084003687, 'timestamp': '2025-09-10 02:21:02.018682', 'step': 2497, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 352], 'flops': 10441544708224}, 'timestamp': '2025-09-10 02:21:02.059458', 'step': 2497, 'epoch': 2} {'type': 'loss', 'content': 0.012827993370592594, 'timestamp': '2025-09-10 02:21:02.073181', 'step': 2498, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 7119318626112}, 'timestamp': '2025-09-10 02:21:02.112247', 'step': 2498, 'epoch': 2} {'type': 'loss', 'content': 0.010569563135504723, 'timestamp': '2025-09-10 02:21:02.119588', 'step': 2499, 'epoch': 2} {'type': 'flops', 'content': [{'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5695213340928}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3796808916480}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4429610391296}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4746011128704}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4746011128704}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6011614078336}, {'type': 'perplexity', 'in_batch_dim': [8, 736], 'batch_size': 8, 'flops': 14554433988352}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3796808916480}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4113209653888}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3796808916480}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4746011128704}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2847606704256}, {'type': 'perplexity', 'in_batch_dim': [8, 320], 'batch_size': 8, 'flops': 6328014815744}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5378812603520}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3796808916480}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4746011128704}, {'type': 'perplexity', 'in_batch_dim': [8, 352], 'batch_size': 8, 'flops': 6960816290560}, {'type': 'perplexity', 'in_batch_dim': [8, 384], 'batch_size': 8, 'flops': 7593617765376}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5378812603520}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4429610391296}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4113209653888}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5062411866112}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5062411866112}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4429610391296}, {'type': 'perplexity', 'in_batch_dim': [8, 480], 'batch_size': 8, 'flops': 9492022189824}, {'type': 'perplexity', 'in_batch_dim': [8, 448], 'batch_size': 8, 'flops': 8859220715008}, {'type': 'perplexity', 'in_batch_dim': [8, 320], 'batch_size': 8, 'flops': 6328014815744}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3796808916480}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6011614078336}, {'type': 'perplexity', 'in_batch_dim': [8, 320], 'batch_size': 8, 'flops': 6328014815744}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5695213340928}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4746011128704}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4429610391296}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5378812603520}, {'type': 'perplexity', 'in_batch_dim': [8, 544], 'batch_size': 8, 'flops': 10757625139456}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4429610391296}, {'type': 'perplexity', 'in_batch_dim': [8, 320], 'batch_size': 8, 'flops': 6328014815744}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4113209653888}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4113209653888}, {'type': 'perplexity', 'in_batch_dim': [8, 320], 'batch_size': 8, 'flops': 6328014815744}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3480408179072}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5062411866112}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6011614078336}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4429610391296}, {'type': 'perplexity', 'in_batch_dim': [8, 384], 'batch_size': 8, 'flops': 7593617765376}, {'type': 'perplexity', 'in_batch_dim': [8, 416], 'batch_size': 8, 'flops': 8226419240192}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4746011128704}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3480408179072}, {'type': 'perplexity', 'in_batch_dim': [8, 384], 'batch_size': 8, 'flops': 7593617765376}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3480408179072}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3480408179072}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5378812603520}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3796808916480}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4113209653888}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5062411866112}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5378812603520}, {'type': 'perplexity', 'in_batch_dim': [8, 320], 'batch_size': 8, 'flops': 6328014815744}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4429610391296}, {'type': 'perplexity', 'in_batch_dim': [8, 160], 'batch_size': 8, 'flops': 3164007441664}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4113209653888}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5695213340928}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3796808916480}, {'type': 'perplexity', 'in_batch_dim': [8, 160], 'batch_size': 8, 'flops': 3164007441664}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6011614078336}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5062411866112}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4429610391296}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3480408179072}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4746011128704}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3480408179072}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4113209653888}, {'type': 'perplexity', 'in_batch_dim': [8, 624], 'batch_size': 8, 'flops': 12339628826496}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4113209653888}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5695213340928}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5062411866112}, {'type': 'perplexity', 'in_batch_dim': [8, 384], 'batch_size': 8, 'flops': 7593617765376}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4429610391296}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4113209653888}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5062411866112}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4429610391296}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5695213340928}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3796808916480}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5378812603520}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4429610391296}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5378812603520}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3796808916480}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5695213340928}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4429610391296}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5062411866112}, {'type': 'perplexity', 'in_batch_dim': [8, 160], 'batch_size': 8, 'flops': 3164007441664}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4746011128704}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5695213340928}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3796808916480}, {'type': 'perplexity', 'in_batch_dim': [8, 416], 'batch_size': 8, 'flops': 8226419240192}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3796808916480}, {'type': 'perplexity', 'in_batch_dim': [8, 160], 'batch_size': 8, 'flops': 3164007441664}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4746011128704}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3796808916480}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2847606704256}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5378812603520}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4429610391296}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4113209653888}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4429610391296}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6011614078336}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4113209653888}, {'type': 'perplexity', 'in_batch_dim': [8, 320], 'batch_size': 8, 'flops': 6328014815744}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4746011128704}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4746011128704}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3796808916480}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5062411866112}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3480408179072}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3796808916480}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3480408179072}, {'type': 'perplexity', 'in_batch_dim': [8, 352], 'batch_size': 8, 'flops': 6960816290560}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5062411866112}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4429610391296}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6011614078336}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5062411866112}, {'type': 'perplexity', 'in_batch_dim': [8, 576], 'batch_size': 8, 'flops': 11390426614272}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4429610391296}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3796808916480}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6011614078336}, {'type': 'perplexity', 'in_batch_dim': [8, 432], 'batch_size': 8, 'flops': 8542819977600}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5378812603520}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5378812603520}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5062411866112}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4113209653888}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4113209653888}, {'type': 'perplexity', 'in_batch_dim': [8, 368], 'batch_size': 8, 'flops': 7277217027968}, {'type': 'perplexity', 'in_batch_dim': [8, 160], 'batch_size': 8, 'flops': 3164007441664}, {'type': 'perplexity', 'in_batch_dim': [8, 368], 'batch_size': 8, 'flops': 7277217027968}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3796808916480}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4746011128704}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4113209653888}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4429610391296}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5695213340928}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5695213340928}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5062411866112}, {'type': 'perplexity', 'in_batch_dim': [8, 432], 'batch_size': 8, 'flops': 8542819977600}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5062411866112}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4113209653888}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3480408179072}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3480408179072}, {'type': 'perplexity', 'in_batch_dim': [8, 160], 'batch_size': 8, 'flops': 3164007441664}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4113209653888}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3796808916480}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5062411866112}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 368], 'batch_size': 8, 'flops': 7277217027968}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5378812603520}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5378812603520}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5062411866112}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2847606704256}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4429610391296}, {'type': 'perplexity', 'in_batch_dim': [8, 320], 'batch_size': 8, 'flops': 6328014815744}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3796808916480}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4429610391296}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4113209653888}, {'type': 'perplexity', 'in_batch_dim': [8, 384], 'batch_size': 8, 'flops': 7593617765376}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5378812603520}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4746011128704}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3480408179072}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5062411866112}, {'type': 'perplexity', 'in_batch_dim': [8, 320], 'batch_size': 8, 'flops': 6328014815744}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5378812603520}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4113209653888}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4113209653888}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5695213340928}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3480408179072}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6011614078336}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4746011128704}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5378812603520}, {'type': 'perplexity', 'in_batch_dim': [8, 320], 'batch_size': 8, 'flops': 6328014815744}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4429610391296}, {'type': 'perplexity', 'in_batch_dim': [8, 320], 'batch_size': 8, 'flops': 6328014815744}, {'type': 'perplexity', 'in_batch_dim': [8, 416], 'batch_size': 8, 'flops': 8226419240192}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5062411866112}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5062411866112}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2847606704256}, {'type': 'perplexity', 'in_batch_dim': [8, 336], 'batch_size': 8, 'flops': 6644415553152}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6011614078336}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5062411866112}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4113209653888}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5695213340928}, {'type': 'perplexity', 'in_batch_dim': [8, 416], 'batch_size': 8, 'flops': 8226419240192}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3796808916480}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5695213340928}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4746011128704}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4429610391296}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4113209653888}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3480408179072}, {'type': 'perplexity', 'in_batch_dim': [8, 320], 'batch_size': 8, 'flops': 6328014815744}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3480408179072}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5378812603520}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6011614078336}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4429610391296}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5695213340928}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5695213340928}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3480408179072}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4429610391296}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5062411866112}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3480408179072}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4113209653888}, {'type': 'perplexity', 'in_batch_dim': [8, 352], 'batch_size': 8, 'flops': 6960816290560}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4113209653888}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2847606704256}, {'type': 'perplexity', 'in_batch_dim': [8, 160], 'batch_size': 8, 'flops': 3164007441664}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3796808916480}, {'type': 'perplexity', 'in_batch_dim': [8, 496], 'batch_size': 8, 'flops': 9808422927232}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3480408179072}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4113209653888}, {'type': 'perplexity', 'in_batch_dim': [8, 544], 'batch_size': 8, 'flops': 10757625139456}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6011614078336}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5378812603520}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5695213340928}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4746011128704}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5695213340928}, {'type': 'perplexity', 'in_batch_dim': [8, 384], 'batch_size': 8, 'flops': 7593617765376}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3480408179072}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5378812603520}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3480408179072}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5378812603520}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5062411866112}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3796808916480}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5062411866112}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5695213340928}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5378812603520}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4429610391296}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5062411866112}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3480408179072}, {'type': 'perplexity', 'in_batch_dim': [8, 656], 'batch_size': 8, 'flops': 12972430301312}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5062411866112}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3796808916480}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3480408179072}, {'type': 'perplexity', 'in_batch_dim': [8, 160], 'batch_size': 8, 'flops': 3164007441664}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5695213340928}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6011614078336}, {'type': 'perplexity', 'in_batch_dim': [8, 320], 'batch_size': 8, 'flops': 6328014815744}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5062411866112}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5062411866112}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5695213340928}, {'type': 'perplexity', 'in_batch_dim': [8, 320], 'batch_size': 8, 'flops': 6328014815744}, {'type': 'perplexity', 'in_batch_dim': [8, 384], 'batch_size': 8, 'flops': 7593617765376}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5062411866112}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4746011128704}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4113209653888}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4746011128704}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4746011128704}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3480408179072}, {'type': 'perplexity', 'in_batch_dim': [8, 320], 'batch_size': 8, 'flops': 6328014815744}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5695213340928}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5062411866112}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4113209653888}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4746011128704}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2847606704256}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3796808916480}, {'type': 'perplexity', 'in_batch_dim': [8, 336], 'batch_size': 8, 'flops': 6644415553152}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4113209653888}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4746011128704}, {'type': 'perplexity', 'in_batch_dim': [8, 496], 'batch_size': 8, 'flops': 9808422927232}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5378812603520}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5378812603520}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4429610391296}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5062411866112}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6011614078336}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4746011128704}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3796808916480}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4113209653888}, {'type': 'perplexity', 'in_batch_dim': [8, 496], 'batch_size': 8, 'flops': 9808422927232}, {'type': 'perplexity', 'in_batch_dim': [8, 448], 'batch_size': 8, 'flops': 8859220715008}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6011614078336}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3796808916480}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3480408179072}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5062411866112}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4429610391296}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4113209653888}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4746011128704}, {'type': 'perplexity', 'in_batch_dim': [8, 320], 'batch_size': 8, 'flops': 6328014815744}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6011614078336}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3480408179072}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3796808916480}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3796808916480}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4746011128704}, {'type': 'perplexity', 'in_batch_dim': [8, 416], 'batch_size': 8, 'flops': 8226419240192}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5695213340928}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6011614078336}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6011614078336}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4746011128704}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3480408179072}, {'type': 'perplexity', 'in_batch_dim': [8, 160], 'batch_size': 8, 'flops': 3164007441664}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5695213340928}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3480408179072}, {'type': 'perplexity', 'in_batch_dim': [8, 400], 'batch_size': 8, 'flops': 7910018502784}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4429610391296}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5378812603520}, {'type': 'perplexity', 'in_batch_dim': [8, 400], 'batch_size': 8, 'flops': 7910018502784}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5378812603520}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3480408179072}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6011614078336}, {'type': 'perplexity', 'in_batch_dim': [8, 160], 'batch_size': 8, 'flops': 3164007441664}, {'type': 'perplexity', 'in_batch_dim': [8, 352], 'batch_size': 8, 'flops': 6960816290560}, {'type': 'perplexity', 'in_batch_dim': [8, 320], 'batch_size': 8, 'flops': 6328014815744}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5062411866112}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5062411866112}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4113209653888}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4429610391296}, {'type': 'perplexity', 'in_batch_dim': [8, 352], 'batch_size': 8, 'flops': 6960816290560}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4113209653888}, {'type': 'perplexity', 'in_batch_dim': [8, 160], 'batch_size': 8, 'flops': 3164007441664}, {'type': 'perplexity', 'in_batch_dim': [8, 336], 'batch_size': 8, 'flops': 6644415553152}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4429610391296}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3796808916480}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5378812603520}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5378812603520}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4113209653888}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5378812603520}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3796808916480}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5378812603520}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5378812603520}, {'type': 'perplexity', 'in_batch_dim': [8, 160], 'batch_size': 8, 'flops': 3164007441664}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3480408179072}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6011614078336}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5695213340928}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5378812603520}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5378812603520}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4429610391296}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4746011128704}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6011614078336}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5062411866112}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6011614078336}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5378812603520}, {'type': 'perplexity', 'in_batch_dim': [8, 160], 'batch_size': 8, 'flops': 3164007441664}, {'type': 'perplexity', 'in_batch_dim': [8, 160], 'batch_size': 8, 'flops': 3164007441664}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5062411866112}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5062411866112}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3796808916480}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6011614078336}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4113209653888}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4746011128704}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5378812603520}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3480408179072}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3796808916480}, {'type': 'perplexity', 'in_batch_dim': [8, 320], 'batch_size': 8, 'flops': 6328014815744}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5062411866112}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4746011128704}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5062411866112}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4746011128704}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6011614078336}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3796808916480}, {'type': 'perplexity', 'in_batch_dim': [8, 160], 'batch_size': 8, 'flops': 3164007441664}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2847606704256}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4429610391296}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4746011128704}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5062411866112}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5378812603520}, {'type': 'perplexity', 'in_batch_dim': [8, 464], 'batch_size': 8, 'flops': 9175621452416}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4113209653888}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3480408179072}, {'type': 'perplexity', 'in_batch_dim': [8, 384], 'batch_size': 8, 'flops': 7593617765376}, {'type': 'perplexity', 'in_batch_dim': [8, 448], 'batch_size': 8, 'flops': 8859220715008}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5378812603520}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4113209653888}, {'type': 'perplexity', 'in_batch_dim': [8, 320], 'batch_size': 8, 'flops': 6328014815744}, {'type': 'perplexity', 'in_batch_dim': [8, 560], 'batch_size': 8, 'flops': 11074025876864}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5378812603520}, {'type': 'perplexity', 'in_batch_dim': [8, 160], 'batch_size': 8, 'flops': 3164007441664}, {'type': 'perplexity', 'in_batch_dim': [8, 320], 'batch_size': 8, 'flops': 6328014815744}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5062411866112}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6011614078336}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4746011128704}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6011614078336}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3796808916480}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4429610391296}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4113209653888}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4746011128704}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3480408179072}, {'type': 'perplexity', 'in_batch_dim': [8, 384], 'batch_size': 8, 'flops': 7593617765376}, {'type': 'perplexity', 'in_batch_dim': [8, 576], 'batch_size': 8, 'flops': 11390426614272}, {'type': 'perplexity', 'in_batch_dim': [8, 160], 'batch_size': 8, 'flops': 3164007441664}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4746011128704}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4746011128704}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4429610391296}, {'type': 'perplexity', 'in_batch_dim': [8, 336], 'batch_size': 8, 'flops': 6644415553152}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4746011128704}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3796808916480}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3796808916480}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5062411866112}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5378812603520}, {'type': 'perplexity', 'in_batch_dim': [8, 368], 'batch_size': 8, 'flops': 7277217027968}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3480408179072}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4113209653888}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4746011128704}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3480408179072}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4746011128704}, {'type': 'perplexity', 'in_batch_dim': [8, 1168], 'batch_size': 8, 'flops': 23097253898368}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5062411866112}, {'type': 'perplexity', 'in_batch_dim': [8, 160], 'batch_size': 8, 'flops': 3164007441664}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4746011128704}, {'type': 'perplexity', 'in_batch_dim': [8, 640], 'batch_size': 8, 'flops': 12656029563904}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5378812603520}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4429610391296}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4429610391296}, {'type': 'perplexity', 'in_batch_dim': [8, 384], 'batch_size': 8, 'flops': 7593617765376}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3480408179072}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4113209653888}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3796808916480}, {'type': 'perplexity', 'in_batch_dim': [8, 336], 'batch_size': 8, 'flops': 6644415553152}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3480408179072}, {'type': 'perplexity', 'in_batch_dim': [6, 208], 'batch_size': 8, 'flops': 4113209653888}], 'timestamp': '2025-09-10 02:21:12.914739', 'step': 2499, 'epoch': 2} {'type': 'pplx', 'content': 18230494.697521377, 'timestamp': '2025-09-10 02:21:12.919042', 'step': 2499, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 7119318626112}, 'timestamp': '2025-09-10 02:21:12.952692', 'step': 2499, 'epoch': 2} {'type': 'loss', 'content': 0.007842292077839375, 'timestamp': '2025-09-10 02:21:12.979378', 'step': 2500, 'epoch': 2} {'type': 'info', 'content': 'Checkpoint saved at step 2500', 'timestamp': '2025-09-10 02:21:18.083563', 'step': 2500, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 7593922352128}, 'timestamp': '2025-09-10 02:21:18.117148', 'step': 2500, 'epoch': 2} {'type': 'loss', 'content': 0.0012375351507216692, 'timestamp': '2025-09-10 02:21:18.121456', 'step': 2501, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 8068526078144}, 'timestamp': '2025-09-10 02:21:18.155236', 'step': 2501, 'epoch': 2} {'type': 'loss', 'content': 0.001796129741705954, 'timestamp': '2025-09-10 02:21:18.164371', 'step': 2502, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 6170111174080}, 'timestamp': '2025-09-10 02:21:18.199139', 'step': 2502, 'epoch': 2} {'type': 'loss', 'content': 0.01929536834359169, 'timestamp': '2025-09-10 02:21:18.205478', 'step': 2503, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 8068526078144}, 'timestamp': '2025-09-10 02:21:18.240242', 'step': 2503, 'epoch': 2} {'type': 'loss', 'content': 0.012172207236289978, 'timestamp': '2025-09-10 02:21:18.270857', 'step': 2504, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 7593922352128}, 'timestamp': '2025-09-10 02:21:18.313891', 'step': 2504, 'epoch': 2} {'type': 'loss', 'content': 0.012182426638901234, 'timestamp': '2025-09-10 02:21:18.319073', 'step': 2505, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 6644714900096}, 'timestamp': '2025-09-10 02:21:18.352236', 'step': 2505, 'epoch': 2} {'type': 'loss', 'content': 0.0019687467720359564, 'timestamp': '2025-09-10 02:21:18.359205', 'step': 2506, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 6170111174080}, 'timestamp': '2025-09-10 02:21:18.392271', 'step': 2506, 'epoch': 2} {'type': 'loss', 'content': 0.0035485646221786737, 'timestamp': '2025-09-10 02:21:18.399195', 'step': 2507, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 368], 'flops': 10916148434240}, 'timestamp': '2025-09-10 02:21:18.434049', 'step': 2507, 'epoch': 2} {'type': 'loss', 'content': 0.017159203067421913, 'timestamp': '2025-09-10 02:21:18.468718', 'step': 2508, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 8068526078144}, 'timestamp': '2025-09-10 02:21:18.505446', 'step': 2508, 'epoch': 2} {'type': 'loss', 'content': 0.00748326163738966, 'timestamp': '2025-09-10 02:21:18.512742', 'step': 2509, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-10 02:21:18.549559', 'step': 2509, 'epoch': 2} {'type': 'loss', 'content': 0.03193259984254837, 'timestamp': '2025-09-10 02:21:18.551969', 'step': 2510, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 6170111174080}, 'timestamp': '2025-09-10 02:21:18.587099', 'step': 2510, 'epoch': 2} {'type': 'loss', 'content': 0.01799680106341839, 'timestamp': '2025-09-10 02:21:18.593659', 'step': 2511, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 320], 'flops': 9492337256192}, 'timestamp': '2025-09-10 02:21:18.628387', 'step': 2511, 'epoch': 2} {'type': 'loss', 'content': 0.0041481442749500275, 'timestamp': '2025-09-10 02:21:18.661835', 'step': 2512, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 176], 'flops': 5220903722048}, 'timestamp': '2025-09-10 02:21:18.693026', 'step': 2512, 'epoch': 2} {'type': 'loss', 'content': 0.011895556934177876, 'timestamp': '2025-09-10 02:21:18.695018', 'step': 2513, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 4746299996032}, 'timestamp': '2025-09-10 02:21:18.725339', 'step': 2513, 'epoch': 2} {'type': 'loss', 'content': 0.007325722835958004, 'timestamp': '2025-09-10 02:21:18.727977', 'step': 2514, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 304], 'flops': 9017733530176}, 'timestamp': '2025-09-10 02:21:18.758656', 'step': 2514, 'epoch': 2} {'type': 'loss', 'content': 0.011184222996234894, 'timestamp': '2025-09-10 02:21:18.770940', 'step': 2515, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 320], 'flops': 9492337256192}, 'timestamp': '2025-09-10 02:21:18.803371', 'step': 2515, 'epoch': 2} {'type': 'loss', 'content': 0.011311122216284275, 'timestamp': '2025-09-10 02:21:18.836820', 'step': 2516, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 7119318626112}, 'timestamp': '2025-09-10 02:21:18.867500', 'step': 2516, 'epoch': 2} {'type': 'loss', 'content': 0.0030376592185348272, 'timestamp': '2025-09-10 02:21:18.872524', 'step': 2517, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 5695507448064}, 'timestamp': '2025-09-10 02:21:18.903552', 'step': 2517, 'epoch': 2} {'type': 'loss', 'content': 0.013369477353990078, 'timestamp': '2025-09-10 02:21:18.907979', 'step': 2518, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 5695507448064}, 'timestamp': '2025-09-10 02:21:18.938715', 'step': 2518, 'epoch': 2} {'type': 'loss', 'content': 0.0013479441404342651, 'timestamp': '2025-09-10 02:21:18.943289', 'step': 2519, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 6170111174080}, 'timestamp': '2025-09-10 02:21:18.973618', 'step': 2519, 'epoch': 2} {'type': 'loss', 'content': 0.019774915650486946, 'timestamp': '2025-09-10 02:21:19.001111', 'step': 2520, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 6644714900096}, 'timestamp': '2025-09-10 02:21:19.031737', 'step': 2520, 'epoch': 2} {'type': 'loss', 'content': 0.017613651230931282, 'timestamp': '2025-09-10 02:21:19.036334', 'step': 2521, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 6170111174080}, 'timestamp': '2025-09-10 02:21:19.067319', 'step': 2521, 'epoch': 2} {'type': 'loss', 'content': 0.010517450049519539, 'timestamp': '2025-09-10 02:21:19.073952', 'step': 2522, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 8068526078144}, 'timestamp': '2025-09-10 02:21:19.105667', 'step': 2522, 'epoch': 2} {'type': 'loss', 'content': 0.010430269874632359, 'timestamp': '2025-09-10 02:21:19.115558', 'step': 2523, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 304], 'flops': 9017733530176}, 'timestamp': '2025-09-10 02:21:19.146127', 'step': 2523, 'epoch': 2} {'type': 'loss', 'content': 0.012062592431902885, 'timestamp': '2025-09-10 02:21:19.179159', 'step': 2524, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 352], 'flops': 10441544708224}, 'timestamp': '2025-09-10 02:21:19.212693', 'step': 2524, 'epoch': 2} {'type': 'loss', 'content': 0.0021377981174737215, 'timestamp': '2025-09-10 02:21:19.225778', 'step': 2525, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 336], 'flops': 9966940982208}, 'timestamp': '2025-09-10 02:21:19.259740', 'step': 2525, 'epoch': 2} {'type': 'loss', 'content': 0.014699029736220837, 'timestamp': '2025-09-10 02:21:19.273123', 'step': 2526, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 176], 'flops': 5220903722048}, 'timestamp': '2025-09-10 02:21:19.303637', 'step': 2526, 'epoch': 2} {'type': 'loss', 'content': 0.00048396483180113137, 'timestamp': '2025-09-10 02:21:19.307765', 'step': 2527, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 5695507448064}, 'timestamp': '2025-09-10 02:21:19.338356', 'step': 2527, 'epoch': 2} {'type': 'loss', 'content': 0.010418041609227657, 'timestamp': '2025-09-10 02:21:19.363583', 'step': 2528, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 320], 'flops': 9492337256192}, 'timestamp': '2025-09-10 02:21:19.394714', 'step': 2528, 'epoch': 2} {'type': 'loss', 'content': 0.0037663152907043695, 'timestamp': '2025-09-10 02:21:19.405172', 'step': 2529, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 352], 'flops': 10441544708224}, 'timestamp': '2025-09-10 02:21:19.439960', 'step': 2529, 'epoch': 2} {'type': 'loss', 'content': 0.006023730151355267, 'timestamp': '2025-09-10 02:21:19.453673', 'step': 2530, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 176], 'flops': 5220903722048}, 'timestamp': '2025-09-10 02:21:19.484141', 'step': 2530, 'epoch': 2} {'type': 'loss', 'content': 0.022649195045232773, 'timestamp': '2025-09-10 02:21:19.488584', 'step': 2531, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 6644714900096}, 'timestamp': '2025-09-10 02:21:19.519990', 'step': 2531, 'epoch': 2} {'type': 'loss', 'content': 0.008288032375276089, 'timestamp': '2025-09-10 02:21:19.548032', 'step': 2532, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 320], 'flops': 9492337256192}, 'timestamp': '2025-09-10 02:21:19.579154', 'step': 2532, 'epoch': 2} {'type': 'loss', 'content': 0.019115403294563293, 'timestamp': '2025-09-10 02:21:19.589494', 'step': 2533, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 6644714900096}, 'timestamp': '2025-09-10 02:21:19.622081', 'step': 2533, 'epoch': 2} {'type': 'loss', 'content': 0.005932506639510393, 'timestamp': '2025-09-10 02:21:19.628835', 'step': 2534, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 608], 'flops': 18035204324480}, 'timestamp': '2025-09-10 02:21:19.680355', 'step': 2534, 'epoch': 2} {'type': 'loss', 'content': 0.013247926719486713, 'timestamp': '2025-09-10 02:21:19.701849', 'step': 2535, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 320], 'flops': 9492337256192}, 'timestamp': '2025-09-10 02:21:19.732943', 'step': 2535, 'epoch': 2} {'type': 'loss', 'content': 0.0022640167735517025, 'timestamp': '2025-09-10 02:21:19.766423', 'step': 2536, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 5695507448064}, 'timestamp': '2025-09-10 02:21:19.802874', 'step': 2536, 'epoch': 2} {'type': 'loss', 'content': 0.0012684384128078818, 'timestamp': '2025-09-10 02:21:19.805624', 'step': 2537, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 5695507448064}, 'timestamp': '2025-09-10 02:21:19.837258', 'step': 2537, 'epoch': 2} {'type': 'loss', 'content': 0.007246891502290964, 'timestamp': '2025-09-10 02:21:19.841698', 'step': 2538, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 352], 'flops': 10441544708224}, 'timestamp': '2025-09-10 02:21:19.876281', 'step': 2538, 'epoch': 2} {'type': 'loss', 'content': 0.01728993095457554, 'timestamp': '2025-09-10 02:21:19.889930', 'step': 2539, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 384], 'flops': 11390752160256}, 'timestamp': '2025-09-10 02:21:19.929146', 'step': 2539, 'epoch': 2} {'type': 'loss', 'content': 0.011178716085851192, 'timestamp': '2025-09-10 02:21:19.964026', 'step': 2540, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 5695507448064}, 'timestamp': '2025-09-10 02:21:20.000513', 'step': 2540, 'epoch': 2} {'type': 'loss', 'content': 0.008106366731226444, 'timestamp': '2025-09-10 02:21:20.007292', 'step': 2541, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 6170111174080}, 'timestamp': '2025-09-10 02:21:20.043383', 'step': 2541, 'epoch': 2} {'type': 'loss', 'content': 0.02785063162446022, 'timestamp': '2025-09-10 02:21:20.050102', 'step': 2542, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 6170111174080}, 'timestamp': '2025-09-10 02:21:20.084769', 'step': 2542, 'epoch': 2} {'type': 'loss', 'content': 0.025354115292429924, 'timestamp': '2025-09-10 02:21:20.091793', 'step': 2543, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 5695507448064}, 'timestamp': '2025-09-10 02:21:20.129268', 'step': 2543, 'epoch': 2} {'type': 'loss', 'content': 0.0003218255878891796, 'timestamp': '2025-09-10 02:21:20.155204', 'step': 2544, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 8068526078144}, 'timestamp': '2025-09-10 02:21:20.190690', 'step': 2544, 'epoch': 2} {'type': 'loss', 'content': 0.016497811302542686, 'timestamp': '2025-09-10 02:21:20.197651', 'step': 2545, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 5695507448064}, 'timestamp': '2025-09-10 02:21:20.231970', 'step': 2545, 'epoch': 2} {'type': 'loss', 'content': 0.013020535930991173, 'timestamp': '2025-09-10 02:21:20.236272', 'step': 2546, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 7593922352128}, 'timestamp': '2025-09-10 02:21:20.269435', 'step': 2546, 'epoch': 2} {'type': 'loss', 'content': 0.0023302300833165646, 'timestamp': '2025-09-10 02:21:20.276990', 'step': 2547, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 6644714900096}, 'timestamp': '2025-09-10 02:21:20.308936', 'step': 2547, 'epoch': 2} {'type': 'loss', 'content': 0.0026004468090832233, 'timestamp': '2025-09-10 02:21:20.336788', 'step': 2548, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 368], 'flops': 10916148434240}, 'timestamp': '2025-09-10 02:21:20.375176', 'step': 2548, 'epoch': 2} {'type': 'loss', 'content': 0.008805993013083935, 'timestamp': '2025-09-10 02:21:20.388388', 'step': 2549, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 7593922352128}, 'timestamp': '2025-09-10 02:21:20.432408', 'step': 2549, 'epoch': 2} {'type': 'loss', 'content': 0.001498592202551663, 'timestamp': '2025-09-10 02:21:20.440194', 'step': 2550, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 5695507448064}, 'timestamp': '2025-09-10 02:21:20.477923', 'step': 2550, 'epoch': 2} {'type': 'loss', 'content': 0.0017248743679374456, 'timestamp': '2025-09-10 02:21:20.483230', 'step': 2551, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 5695507448064}, 'timestamp': '2025-09-10 02:21:20.528188', 'step': 2551, 'epoch': 2} {'type': 'loss', 'content': 0.006637393496930599, 'timestamp': '2025-09-10 02:21:20.553155', 'step': 2552, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 7593922352128}, 'timestamp': '2025-09-10 02:21:20.585361', 'step': 2552, 'epoch': 2} {'type': 'loss', 'content': 0.045442163944244385, 'timestamp': '2025-09-10 02:21:20.590283', 'step': 2553, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 8068526078144}, 'timestamp': '2025-09-10 02:21:20.622339', 'step': 2553, 'epoch': 2} {'type': 'loss', 'content': 0.018050571903586388, 'timestamp': '2025-09-10 02:21:20.631958', 'step': 2554, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 6644714900096}, 'timestamp': '2025-09-10 02:21:20.662783', 'step': 2554, 'epoch': 2} {'type': 'loss', 'content': 0.0028236510697752237, 'timestamp': '2025-09-10 02:21:20.669499', 'step': 2555, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 6644714900096}, 'timestamp': '2025-09-10 02:21:20.699836', 'step': 2555, 'epoch': 2} {'type': 'loss', 'content': 0.024217301979660988, 'timestamp': '2025-09-10 02:21:20.727494', 'step': 2556, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 8543129804160}, 'timestamp': '2025-09-10 02:21:20.759827', 'step': 2556, 'epoch': 2} {'type': 'loss', 'content': 0.0008744286606088281, 'timestamp': '2025-09-10 02:21:20.768328', 'step': 2557, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 6170111174080}, 'timestamp': '2025-09-10 02:21:20.799748', 'step': 2557, 'epoch': 2} {'type': 'loss', 'content': 0.0017181773437187076, 'timestamp': '2025-09-10 02:21:20.806556', 'step': 2558, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 7119318626112}, 'timestamp': '2025-09-10 02:21:20.837541', 'step': 2558, 'epoch': 2} {'type': 'loss', 'content': 0.0028813164681196213, 'timestamp': '2025-09-10 02:21:20.844912', 'step': 2559, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 7119318626112}, 'timestamp': '2025-09-10 02:21:20.876531', 'step': 2559, 'epoch': 2} {'type': 'loss', 'content': 0.0020341165363788605, 'timestamp': '2025-09-10 02:21:20.904934', 'step': 2560, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 7593922352128}, 'timestamp': '2025-09-10 02:21:20.937659', 'step': 2560, 'epoch': 2} {'type': 'loss', 'content': 0.013223226182162762, 'timestamp': '2025-09-10 02:21:20.943076', 'step': 2561, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 7593922352128}, 'timestamp': '2025-09-10 02:21:20.972726', 'step': 2561, 'epoch': 2} {'type': 'loss', 'content': 0.00022325999452732503, 'timestamp': '2025-09-10 02:21:20.980421', 'step': 2562, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 7119318626112}, 'timestamp': '2025-09-10 02:21:21.010388', 'step': 2562, 'epoch': 2} {'type': 'loss', 'content': 0.0023189482744783163, 'timestamp': '2025-09-10 02:21:21.018006', 'step': 2563, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 6170111174080}, 'timestamp': '2025-09-10 02:21:21.048385', 'step': 2563, 'epoch': 2} {'type': 'loss', 'content': 0.025371316820383072, 'timestamp': '2025-09-10 02:21:21.076165', 'step': 2564, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 304], 'flops': 9017733530176}, 'timestamp': '2025-09-10 02:21:21.107692', 'step': 2564, 'epoch': 2} {'type': 'loss', 'content': 0.001871153013780713, 'timestamp': '2025-09-10 02:21:21.117495', 'step': 2565, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 7593922352128}, 'timestamp': '2025-09-10 02:21:21.151995', 'step': 2565, 'epoch': 2} {'type': 'loss', 'content': 0.02054077573120594, 'timestamp': '2025-09-10 02:21:21.159853', 'step': 2566, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 176], 'flops': 5220903722048}, 'timestamp': '2025-09-10 02:21:21.191020', 'step': 2566, 'epoch': 2} {'type': 'loss', 'content': 0.016704251989722252, 'timestamp': '2025-09-10 02:21:21.194881', 'step': 2567, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 7593922352128}, 'timestamp': '2025-09-10 02:21:21.226260', 'step': 2567, 'epoch': 2} {'type': 'loss', 'content': 0.010114437900483608, 'timestamp': '2025-09-10 02:21:21.254592', 'step': 2568, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 5695507448064}, 'timestamp': '2025-09-10 02:21:21.285689', 'step': 2568, 'epoch': 2} {'type': 'loss', 'content': 0.009702653624117374, 'timestamp': '2025-09-10 02:21:21.287904', 'step': 2569, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 5695507448064}, 'timestamp': '2025-09-10 02:21:21.317390', 'step': 2569, 'epoch': 2} {'type': 'loss', 'content': 0.002314184093847871, 'timestamp': '2025-09-10 02:21:21.322059', 'step': 2570, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 176], 'flops': 5220903722048}, 'timestamp': '2025-09-10 02:21:21.351928', 'step': 2570, 'epoch': 2} {'type': 'loss', 'content': 0.0005179584841243923, 'timestamp': '2025-09-10 02:21:21.355937', 'step': 2571, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 7119318626112}, 'timestamp': '2025-09-10 02:21:21.386239', 'step': 2571, 'epoch': 2} {'type': 'loss', 'content': 0.00541424797847867, 'timestamp': '2025-09-10 02:21:21.414415', 'step': 2572, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 6170111174080}, 'timestamp': '2025-09-10 02:21:21.445599', 'step': 2572, 'epoch': 2} {'type': 'loss', 'content': 0.001489723101258278, 'timestamp': '2025-09-10 02:21:21.449809', 'step': 2573, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 5695507448064}, 'timestamp': '2025-09-10 02:21:21.481471', 'step': 2573, 'epoch': 2} {'type': 'loss', 'content': 0.004314497113227844, 'timestamp': '2025-09-10 02:21:21.485844', 'step': 2574, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 336], 'flops': 9966940982208}, 'timestamp': '2025-09-10 02:21:21.519739', 'step': 2574, 'epoch': 2} {'type': 'loss', 'content': 0.00981599185615778, 'timestamp': '2025-09-10 02:21:21.533143', 'step': 2575, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 6644714900096}, 'timestamp': '2025-09-10 02:21:21.563663', 'step': 2575, 'epoch': 2} {'type': 'loss', 'content': 0.004870929755270481, 'timestamp': '2025-09-10 02:21:21.591404', 'step': 2576, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 320], 'flops': 9492337256192}, 'timestamp': '2025-09-10 02:21:21.622364', 'step': 2576, 'epoch': 2} {'type': 'loss', 'content': 0.016966963186860085, 'timestamp': '2025-09-10 02:21:21.632658', 'step': 2577, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 352], 'flops': 10441544708224}, 'timestamp': '2025-09-10 02:21:21.667842', 'step': 2577, 'epoch': 2} {'type': 'loss', 'content': 0.00209718756377697, 'timestamp': '2025-09-10 02:21:21.681559', 'step': 2578, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 6644714900096}, 'timestamp': '2025-09-10 02:21:21.712192', 'step': 2578, 'epoch': 2} {'type': 'loss', 'content': 0.006372584495693445, 'timestamp': '2025-09-10 02:21:21.719248', 'step': 2579, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 7119318626112}, 'timestamp': '2025-09-10 02:21:21.750187', 'step': 2579, 'epoch': 2} {'type': 'loss', 'content': 0.0025949012488126755, 'timestamp': '2025-09-10 02:21:21.778358', 'step': 2580, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 6170111174080}, 'timestamp': '2025-09-10 02:21:21.808438', 'step': 2580, 'epoch': 2} {'type': 'loss', 'content': 0.02052624709904194, 'timestamp': '2025-09-10 02:21:21.812988', 'step': 2581, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 528], 'flops': 15662185694400}, 'timestamp': '2025-09-10 02:21:21.858702', 'step': 2581, 'epoch': 2} {'type': 'loss', 'content': 0.00807008147239685, 'timestamp': '2025-09-10 02:21:21.877864', 'step': 2582, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 8543129804160}, 'timestamp': '2025-09-10 02:21:21.909123', 'step': 2582, 'epoch': 2} {'type': 'loss', 'content': 0.018544618040323257, 'timestamp': '2025-09-10 02:21:21.920247', 'step': 2583, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 8543129804160}, 'timestamp': '2025-09-10 02:21:21.951880', 'step': 2583, 'epoch': 2} {'type': 'loss', 'content': 0.005954326130449772, 'timestamp': '2025-09-10 02:21:21.983423', 'step': 2584, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 5695507448064}, 'timestamp': '2025-09-10 02:21:22.015401', 'step': 2584, 'epoch': 2} {'type': 'loss', 'content': 0.010289547964930534, 'timestamp': '2025-09-10 02:21:22.017574', 'step': 2585, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 336], 'flops': 9966940982208}, 'timestamp': '2025-09-10 02:21:22.055532', 'step': 2585, 'epoch': 2} {'type': 'loss', 'content': 0.002948526758700609, 'timestamp': '2025-09-10 02:21:22.068927', 'step': 2586, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 320], 'flops': 9492337256192}, 'timestamp': '2025-09-10 02:21:22.101655', 'step': 2586, 'epoch': 2} {'type': 'loss', 'content': 0.0275122057646513, 'timestamp': '2025-09-10 02:21:22.114202', 'step': 2587, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 400], 'flops': 11865355886272}, 'timestamp': '2025-09-10 02:21:22.152963', 'step': 2587, 'epoch': 2} {'type': 'loss', 'content': 0.0017236763378605247, 'timestamp': '2025-09-10 02:21:22.189514', 'step': 2588, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 8068526078144}, 'timestamp': '2025-09-10 02:21:22.220119', 'step': 2588, 'epoch': 2} {'type': 'loss', 'content': 0.016498176380991936, 'timestamp': '2025-09-10 02:21:22.227995', 'step': 2589, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 304], 'flops': 9017733530176}, 'timestamp': '2025-09-10 02:21:22.259287', 'step': 2589, 'epoch': 2} {'type': 'loss', 'content': 0.016592005267739296, 'timestamp': '2025-09-10 02:21:22.271498', 'step': 2590, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 304], 'flops': 9017733530176}, 'timestamp': '2025-09-10 02:21:22.301955', 'step': 2590, 'epoch': 2} {'type': 'loss', 'content': 0.0010614178609102964, 'timestamp': '2025-09-10 02:21:22.314305', 'step': 2591, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 6644714900096}, 'timestamp': '2025-09-10 02:21:22.346152', 'step': 2591, 'epoch': 2} {'type': 'loss', 'content': 0.009229181334376335, 'timestamp': '2025-09-10 02:21:22.374266', 'step': 2592, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 6644714900096}, 'timestamp': '2025-09-10 02:21:22.404849', 'step': 2592, 'epoch': 2} {'type': 'loss', 'content': 0.001209449372254312, 'timestamp': '2025-09-10 02:21:22.409505', 'step': 2593, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 7593922352128}, 'timestamp': '2025-09-10 02:21:22.441174', 'step': 2593, 'epoch': 2} {'type': 'loss', 'content': 0.010440163314342499, 'timestamp': '2025-09-10 02:21:22.448846', 'step': 2594, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 6170111174080}, 'timestamp': '2025-09-10 02:21:22.480478', 'step': 2594, 'epoch': 2} {'type': 'loss', 'content': 0.0017658992437645793, 'timestamp': '2025-09-10 02:21:22.487049', 'step': 2595, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 7119318626112}, 'timestamp': '2025-09-10 02:21:22.517815', 'step': 2595, 'epoch': 2} {'type': 'loss', 'content': 0.0006559863686561584, 'timestamp': '2025-09-10 02:21:22.546183', 'step': 2596, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 400], 'flops': 11865355886272}, 'timestamp': '2025-09-10 02:21:22.581657', 'step': 2596, 'epoch': 2} {'type': 'loss', 'content': 0.015221442095935345, 'timestamp': '2025-09-10 02:21:22.596787', 'step': 2597, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 7593922352128}, 'timestamp': '2025-09-10 02:21:22.628337', 'step': 2597, 'epoch': 2} {'type': 'loss', 'content': 0.0013818376464769244, 'timestamp': '2025-09-10 02:21:22.636111', 'step': 2598, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 6644714900096}, 'timestamp': '2025-09-10 02:21:22.675626', 'step': 2598, 'epoch': 2} {'type': 'loss', 'content': 0.0057688066735863686, 'timestamp': '2025-09-10 02:21:22.682673', 'step': 2599, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 176], 'flops': 5220903722048}, 'timestamp': '2025-09-10 02:21:22.713742', 'step': 2599, 'epoch': 2} {'type': 'loss', 'content': 0.0062509505078196526, 'timestamp': '2025-09-10 02:21:22.738675', 'step': 2600, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-10 02:21:22.772019', 'step': 2600, 'epoch': 2} {'type': 'loss', 'content': 0.01174467708915472, 'timestamp': '2025-09-10 02:21:22.774363', 'step': 2601, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 7119318626112}, 'timestamp': '2025-09-10 02:21:22.806500', 'step': 2601, 'epoch': 2} {'type': 'loss', 'content': 0.026152905076742172, 'timestamp': '2025-09-10 02:21:22.814030', 'step': 2602, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 6644714900096}, 'timestamp': '2025-09-10 02:21:22.845107', 'step': 2602, 'epoch': 2} {'type': 'loss', 'content': 0.0017859925283119082, 'timestamp': '2025-09-10 02:21:22.852090', 'step': 2603, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 7593922352128}, 'timestamp': '2025-09-10 02:21:22.883885', 'step': 2603, 'epoch': 2} {'type': 'loss', 'content': 0.002031368436291814, 'timestamp': '2025-09-10 02:21:22.912569', 'step': 2604, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 416], 'flops': 12339959612288}, 'timestamp': '2025-09-10 02:21:22.949939', 'step': 2604, 'epoch': 2} {'type': 'loss', 'content': 0.004424991551786661, 'timestamp': '2025-09-10 02:21:22.965400', 'step': 2605, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 7119318626112}, 'timestamp': '2025-09-10 02:21:22.999527', 'step': 2605, 'epoch': 2} {'type': 'loss', 'content': 0.032836418598890305, 'timestamp': '2025-09-10 02:21:23.006892', 'step': 2606, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 336], 'flops': 9966940982208}, 'timestamp': '2025-09-10 02:21:23.042573', 'step': 2606, 'epoch': 2} {'type': 'loss', 'content': 0.0030306854750961065, 'timestamp': '2025-09-10 02:21:23.055995', 'step': 2607, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 7119318626112}, 'timestamp': '2025-09-10 02:21:23.087563', 'step': 2607, 'epoch': 2} {'type': 'loss', 'content': 0.0008659110171720386, 'timestamp': '2025-09-10 02:21:23.115941', 'step': 2608, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 320], 'flops': 9492337256192}, 'timestamp': '2025-09-10 02:21:23.148036', 'step': 2608, 'epoch': 2} {'type': 'loss', 'content': 0.0008872836478985846, 'timestamp': '2025-09-10 02:21:23.158399', 'step': 2609, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 5695507448064}, 'timestamp': '2025-09-10 02:21:23.199773', 'step': 2609, 'epoch': 2} {'type': 'loss', 'content': 0.0002644038759171963, 'timestamp': '2025-09-10 02:21:23.204388', 'step': 2610, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 7593922352128}, 'timestamp': '2025-09-10 02:21:23.242060', 'step': 2610, 'epoch': 2} {'type': 'loss', 'content': 0.008746746927499771, 'timestamp': '2025-09-10 02:21:23.249813', 'step': 2611, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 6644714900096}, 'timestamp': '2025-09-10 02:21:23.284421', 'step': 2611, 'epoch': 2} {'type': 'loss', 'content': 0.0010678042890504003, 'timestamp': '2025-09-10 02:21:23.312325', 'step': 2612, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 7593922352128}, 'timestamp': '2025-09-10 02:21:23.347381', 'step': 2612, 'epoch': 2} {'type': 'loss', 'content': 0.006162055768072605, 'timestamp': '2025-09-10 02:21:23.352904', 'step': 2613, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 7593922352128}, 'timestamp': '2025-09-10 02:21:23.383547', 'step': 2613, 'epoch': 2} {'type': 'loss', 'content': 0.012832626700401306, 'timestamp': '2025-09-10 02:21:23.391370', 'step': 2614, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 7119318626112}, 'timestamp': '2025-09-10 02:21:23.423232', 'step': 2614, 'epoch': 2} {'type': 'loss', 'content': 0.0061977319419384, 'timestamp': '2025-09-10 02:21:23.430799', 'step': 2615, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 5695507448064}, 'timestamp': '2025-09-10 02:21:23.464416', 'step': 2615, 'epoch': 2} {'type': 'loss', 'content': 0.0003420994326006621, 'timestamp': '2025-09-10 02:21:23.489618', 'step': 2616, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 7593922352128}, 'timestamp': '2025-09-10 02:21:23.521959', 'step': 2616, 'epoch': 2} {'type': 'loss', 'content': 0.004029420204460621, 'timestamp': '2025-09-10 02:21:23.528022', 'step': 2617, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 336], 'flops': 9966940982208}, 'timestamp': '2025-09-10 02:21:23.567525', 'step': 2617, 'epoch': 2} {'type': 'loss', 'content': 0.007904608733952045, 'timestamp': '2025-09-10 02:21:23.580897', 'step': 2618, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 6644714900096}, 'timestamp': '2025-09-10 02:21:23.617531', 'step': 2618, 'epoch': 2} {'type': 'loss', 'content': 0.056298431009054184, 'timestamp': '2025-09-10 02:21:23.624513', 'step': 2619, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 4746299996032}, 'timestamp': '2025-09-10 02:21:23.657783', 'step': 2619, 'epoch': 2} {'type': 'loss', 'content': 0.04149520769715309, 'timestamp': '2025-09-10 02:21:23.682686', 'step': 2620, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 8543129804160}, 'timestamp': '2025-09-10 02:21:23.714333', 'step': 2620, 'epoch': 2} {'type': 'loss', 'content': 0.013901514001190662, 'timestamp': '2025-09-10 02:21:23.723015', 'step': 2621, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 7593922352128}, 'timestamp': '2025-09-10 02:21:23.754388', 'step': 2621, 'epoch': 2} {'type': 'loss', 'content': 0.005170899443328381, 'timestamp': '2025-09-10 02:21:23.762098', 'step': 2622, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 336], 'flops': 9966940982208}, 'timestamp': '2025-09-10 02:21:23.803781', 'step': 2622, 'epoch': 2} {'type': 'loss', 'content': 0.011854954063892365, 'timestamp': '2025-09-10 02:21:23.817179', 'step': 2623, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 7593922352128}, 'timestamp': '2025-09-10 02:21:23.854925', 'step': 2623, 'epoch': 2} {'type': 'loss', 'content': 0.022033916786313057, 'timestamp': '2025-09-10 02:21:23.883562', 'step': 2624, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-10 02:21:23.915994', 'step': 2624, 'epoch': 2} {'type': 'loss', 'content': 0.0034682333935052156, 'timestamp': '2025-09-10 02:21:23.920954', 'step': 2625, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 6170111174080}, 'timestamp': '2025-09-10 02:21:23.959395', 'step': 2625, 'epoch': 2} {'type': 'loss', 'content': 0.0019165745470672846, 'timestamp': '2025-09-10 02:21:23.966272', 'step': 2626, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 320], 'flops': 9492337256192}, 'timestamp': '2025-09-10 02:21:24.001785', 'step': 2626, 'epoch': 2} {'type': 'loss', 'content': 0.0004640703264158219, 'timestamp': '2025-09-10 02:21:24.014321', 'step': 2627, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 6644714900096}, 'timestamp': '2025-09-10 02:21:24.045683', 'step': 2627, 'epoch': 2} {'type': 'loss', 'content': 0.0022274174261838198, 'timestamp': '2025-09-10 02:21:24.073515', 'step': 2628, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 7593922352128}, 'timestamp': '2025-09-10 02:21:24.106052', 'step': 2628, 'epoch': 2} {'type': 'loss', 'content': 0.006261548958718777, 'timestamp': '2025-09-10 02:21:24.111226', 'step': 2629, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 6170111174080}, 'timestamp': '2025-09-10 02:21:24.142328', 'step': 2629, 'epoch': 2} {'type': 'loss', 'content': 0.0015995798166841269, 'timestamp': '2025-09-10 02:21:24.149177', 'step': 2630, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 7119318626112}, 'timestamp': '2025-09-10 02:21:24.181390', 'step': 2630, 'epoch': 2} {'type': 'loss', 'content': 0.002429540967568755, 'timestamp': '2025-09-10 02:21:24.188917', 'step': 2631, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 6644714900096}, 'timestamp': '2025-09-10 02:21:24.219910', 'step': 2631, 'epoch': 2} {'type': 'loss', 'content': 0.00019681244157254696, 'timestamp': '2025-09-10 02:21:24.247913', 'step': 2632, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 176], 'flops': 5220903722048}, 'timestamp': '2025-09-10 02:21:24.280978', 'step': 2632, 'epoch': 2} {'type': 'loss', 'content': 0.007192966062575579, 'timestamp': '2025-09-10 02:21:24.283097', 'step': 2633, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 352], 'flops': 10441544708224}, 'timestamp': '2025-09-10 02:21:24.319155', 'step': 2633, 'epoch': 2} {'type': 'loss', 'content': 0.007892182096838951, 'timestamp': '2025-09-10 02:21:24.332869', 'step': 2634, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 6170111174080}, 'timestamp': '2025-09-10 02:21:24.365336', 'step': 2634, 'epoch': 2} {'type': 'loss', 'content': 0.0011840269435197115, 'timestamp': '2025-09-10 02:21:24.372246', 'step': 2635, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 7119318626112}, 'timestamp': '2025-09-10 02:21:24.403970', 'step': 2635, 'epoch': 2} {'type': 'loss', 'content': 0.0014801176730543375, 'timestamp': '2025-09-10 02:21:24.432277', 'step': 2636, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 7119318626112}, 'timestamp': '2025-09-10 02:21:24.462772', 'step': 2636, 'epoch': 2} {'type': 'loss', 'content': 0.005055623594671488, 'timestamp': '2025-09-10 02:21:24.468042', 'step': 2637, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 304], 'flops': 9017733530176}, 'timestamp': '2025-09-10 02:21:24.499747', 'step': 2637, 'epoch': 2} {'type': 'loss', 'content': 0.0026747877709567547, 'timestamp': '2025-09-10 02:21:24.511764', 'step': 2638, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 4746299996032}, 'timestamp': '2025-09-10 02:21:24.541779', 'step': 2638, 'epoch': 2} {'type': 'loss', 'content': 0.005939009133726358, 'timestamp': '2025-09-10 02:21:24.544499', 'step': 2639, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 6644714900096}, 'timestamp': '2025-09-10 02:21:24.575720', 'step': 2639, 'epoch': 2} {'type': 'loss', 'content': 0.021643701940774918, 'timestamp': '2025-09-10 02:21:24.603541', 'step': 2640, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 7119318626112}, 'timestamp': '2025-09-10 02:21:24.634852', 'step': 2640, 'epoch': 2} {'type': 'loss', 'content': 0.01125361304730177, 'timestamp': '2025-09-10 02:21:24.639827', 'step': 2641, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 4746299996032}, 'timestamp': '2025-09-10 02:21:24.670124', 'step': 2641, 'epoch': 2} {'type': 'loss', 'content': 0.010432683862745762, 'timestamp': '2025-09-10 02:21:24.672783', 'step': 2642, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 6170111174080}, 'timestamp': '2025-09-10 02:21:24.703885', 'step': 2642, 'epoch': 2} {'type': 'loss', 'content': 0.003385532647371292, 'timestamp': '2025-09-10 02:21:24.710428', 'step': 2643, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 6644714900096}, 'timestamp': '2025-09-10 02:21:24.741330', 'step': 2643, 'epoch': 2} {'type': 'loss', 'content': 0.0025549919810146093, 'timestamp': '2025-09-10 02:21:24.769360', 'step': 2644, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 6170111174080}, 'timestamp': '2025-09-10 02:21:24.800848', 'step': 2644, 'epoch': 2} {'type': 'loss', 'content': 0.0029829232953488827, 'timestamp': '2025-09-10 02:21:24.805464', 'step': 2645, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 8068526078144}, 'timestamp': '2025-09-10 02:21:24.841850', 'step': 2645, 'epoch': 2} {'type': 'loss', 'content': 0.0008848052239045501, 'timestamp': '2025-09-10 02:21:24.852115', 'step': 2646, 'epoch': 2} {'type': 'flops', 'content': [{'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5695213340928}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3796808916480}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4429610391296}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4746011128704}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4746011128704}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6011614078336}, {'type': 'perplexity', 'in_batch_dim': [8, 736], 'batch_size': 8, 'flops': 14554433988352}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3796808916480}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4113209653888}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3796808916480}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4746011128704}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2847606704256}, {'type': 'perplexity', 'in_batch_dim': [8, 320], 'batch_size': 8, 'flops': 6328014815744}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5378812603520}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3796808916480}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4746011128704}, {'type': 'perplexity', 'in_batch_dim': [8, 352], 'batch_size': 8, 'flops': 6960816290560}, {'type': 'perplexity', 'in_batch_dim': [8, 384], 'batch_size': 8, 'flops': 7593617765376}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5378812603520}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4429610391296}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4113209653888}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5062411866112}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5062411866112}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4429610391296}, {'type': 'perplexity', 'in_batch_dim': [8, 480], 'batch_size': 8, 'flops': 9492022189824}, {'type': 'perplexity', 'in_batch_dim': [8, 448], 'batch_size': 8, 'flops': 8859220715008}, {'type': 'perplexity', 'in_batch_dim': [8, 320], 'batch_size': 8, 'flops': 6328014815744}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3796808916480}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6011614078336}, {'type': 'perplexity', 'in_batch_dim': [8, 320], 'batch_size': 8, 'flops': 6328014815744}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5695213340928}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4746011128704}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4429610391296}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5378812603520}, {'type': 'perplexity', 'in_batch_dim': [8, 544], 'batch_size': 8, 'flops': 10757625139456}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4429610391296}, {'type': 'perplexity', 'in_batch_dim': [8, 320], 'batch_size': 8, 'flops': 6328014815744}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4113209653888}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4113209653888}, {'type': 'perplexity', 'in_batch_dim': [8, 320], 'batch_size': 8, 'flops': 6328014815744}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3480408179072}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5062411866112}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6011614078336}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4429610391296}, {'type': 'perplexity', 'in_batch_dim': [8, 384], 'batch_size': 8, 'flops': 7593617765376}, {'type': 'perplexity', 'in_batch_dim': [8, 416], 'batch_size': 8, 'flops': 8226419240192}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4746011128704}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3480408179072}, {'type': 'perplexity', 'in_batch_dim': [8, 384], 'batch_size': 8, 'flops': 7593617765376}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3480408179072}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3480408179072}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5378812603520}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3796808916480}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4113209653888}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5062411866112}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5378812603520}, {'type': 'perplexity', 'in_batch_dim': [8, 320], 'batch_size': 8, 'flops': 6328014815744}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4429610391296}, {'type': 'perplexity', 'in_batch_dim': [8, 160], 'batch_size': 8, 'flops': 3164007441664}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4113209653888}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5695213340928}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3796808916480}, {'type': 'perplexity', 'in_batch_dim': [8, 160], 'batch_size': 8, 'flops': 3164007441664}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6011614078336}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5062411866112}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4429610391296}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3480408179072}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4746011128704}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3480408179072}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4113209653888}, {'type': 'perplexity', 'in_batch_dim': [8, 624], 'batch_size': 8, 'flops': 12339628826496}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4113209653888}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5695213340928}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5062411866112}, {'type': 'perplexity', 'in_batch_dim': [8, 384], 'batch_size': 8, 'flops': 7593617765376}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4429610391296}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4113209653888}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5062411866112}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4429610391296}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5695213340928}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3796808916480}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5378812603520}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4429610391296}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5378812603520}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3796808916480}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5695213340928}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4429610391296}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5062411866112}, {'type': 'perplexity', 'in_batch_dim': [8, 160], 'batch_size': 8, 'flops': 3164007441664}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4746011128704}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5695213340928}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3796808916480}, {'type': 'perplexity', 'in_batch_dim': [8, 416], 'batch_size': 8, 'flops': 8226419240192}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3796808916480}, {'type': 'perplexity', 'in_batch_dim': [8, 160], 'batch_size': 8, 'flops': 3164007441664}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4746011128704}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3796808916480}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2847606704256}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5378812603520}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4429610391296}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4113209653888}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4429610391296}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6011614078336}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4113209653888}, {'type': 'perplexity', 'in_batch_dim': [8, 320], 'batch_size': 8, 'flops': 6328014815744}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4746011128704}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4746011128704}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3796808916480}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5062411866112}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3480408179072}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3796808916480}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3480408179072}, {'type': 'perplexity', 'in_batch_dim': [8, 352], 'batch_size': 8, 'flops': 6960816290560}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5062411866112}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4429610391296}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6011614078336}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5062411866112}, {'type': 'perplexity', 'in_batch_dim': [8, 576], 'batch_size': 8, 'flops': 11390426614272}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4429610391296}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3796808916480}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6011614078336}, {'type': 'perplexity', 'in_batch_dim': [8, 432], 'batch_size': 8, 'flops': 8542819977600}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5378812603520}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5378812603520}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5062411866112}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4113209653888}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4113209653888}, {'type': 'perplexity', 'in_batch_dim': [8, 368], 'batch_size': 8, 'flops': 7277217027968}, {'type': 'perplexity', 'in_batch_dim': [8, 160], 'batch_size': 8, 'flops': 3164007441664}, {'type': 'perplexity', 'in_batch_dim': [8, 368], 'batch_size': 8, 'flops': 7277217027968}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3796808916480}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4746011128704}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4113209653888}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4429610391296}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5695213340928}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5695213340928}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5062411866112}, {'type': 'perplexity', 'in_batch_dim': [8, 432], 'batch_size': 8, 'flops': 8542819977600}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5062411866112}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4113209653888}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3480408179072}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3480408179072}, {'type': 'perplexity', 'in_batch_dim': [8, 160], 'batch_size': 8, 'flops': 3164007441664}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4113209653888}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3796808916480}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5062411866112}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 368], 'batch_size': 8, 'flops': 7277217027968}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5378812603520}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5378812603520}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5062411866112}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2847606704256}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4429610391296}, {'type': 'perplexity', 'in_batch_dim': [8, 320], 'batch_size': 8, 'flops': 6328014815744}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3796808916480}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4429610391296}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4113209653888}, {'type': 'perplexity', 'in_batch_dim': [8, 384], 'batch_size': 8, 'flops': 7593617765376}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5378812603520}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4746011128704}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3480408179072}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5062411866112}, {'type': 'perplexity', 'in_batch_dim': [8, 320], 'batch_size': 8, 'flops': 6328014815744}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5378812603520}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4113209653888}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4113209653888}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5695213340928}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3480408179072}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6011614078336}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4746011128704}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5378812603520}, {'type': 'perplexity', 'in_batch_dim': [8, 320], 'batch_size': 8, 'flops': 6328014815744}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4429610391296}, {'type': 'perplexity', 'in_batch_dim': [8, 320], 'batch_size': 8, 'flops': 6328014815744}, {'type': 'perplexity', 'in_batch_dim': [8, 416], 'batch_size': 8, 'flops': 8226419240192}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5062411866112}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5062411866112}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2847606704256}, {'type': 'perplexity', 'in_batch_dim': [8, 336], 'batch_size': 8, 'flops': 6644415553152}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6011614078336}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5062411866112}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4113209653888}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5695213340928}, {'type': 'perplexity', 'in_batch_dim': [8, 416], 'batch_size': 8, 'flops': 8226419240192}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3796808916480}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5695213340928}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4746011128704}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4429610391296}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4113209653888}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3480408179072}, {'type': 'perplexity', 'in_batch_dim': [8, 320], 'batch_size': 8, 'flops': 6328014815744}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3480408179072}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5378812603520}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6011614078336}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4429610391296}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5695213340928}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5695213340928}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3480408179072}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4429610391296}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5062411866112}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3480408179072}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4113209653888}, {'type': 'perplexity', 'in_batch_dim': [8, 352], 'batch_size': 8, 'flops': 6960816290560}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4113209653888}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2847606704256}, {'type': 'perplexity', 'in_batch_dim': [8, 160], 'batch_size': 8, 'flops': 3164007441664}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3796808916480}, {'type': 'perplexity', 'in_batch_dim': [8, 496], 'batch_size': 8, 'flops': 9808422927232}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3480408179072}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4113209653888}, {'type': 'perplexity', 'in_batch_dim': [8, 544], 'batch_size': 8, 'flops': 10757625139456}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6011614078336}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5378812603520}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5695213340928}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4746011128704}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5695213340928}, {'type': 'perplexity', 'in_batch_dim': [8, 384], 'batch_size': 8, 'flops': 7593617765376}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3480408179072}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5378812603520}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3480408179072}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5378812603520}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5062411866112}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3796808916480}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5062411866112}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5695213340928}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5378812603520}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4429610391296}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5062411866112}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3480408179072}, {'type': 'perplexity', 'in_batch_dim': [8, 656], 'batch_size': 8, 'flops': 12972430301312}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5062411866112}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3796808916480}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3480408179072}, {'type': 'perplexity', 'in_batch_dim': [8, 160], 'batch_size': 8, 'flops': 3164007441664}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5695213340928}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6011614078336}, {'type': 'perplexity', 'in_batch_dim': [8, 320], 'batch_size': 8, 'flops': 6328014815744}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5062411866112}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5062411866112}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5695213340928}, {'type': 'perplexity', 'in_batch_dim': [8, 320], 'batch_size': 8, 'flops': 6328014815744}, {'type': 'perplexity', 'in_batch_dim': [8, 384], 'batch_size': 8, 'flops': 7593617765376}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5062411866112}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4746011128704}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4113209653888}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4746011128704}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4746011128704}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3480408179072}, {'type': 'perplexity', 'in_batch_dim': [8, 320], 'batch_size': 8, 'flops': 6328014815744}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5695213340928}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5062411866112}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4113209653888}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4746011128704}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2847606704256}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3796808916480}, {'type': 'perplexity', 'in_batch_dim': [8, 336], 'batch_size': 8, 'flops': 6644415553152}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4113209653888}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4746011128704}, {'type': 'perplexity', 'in_batch_dim': [8, 496], 'batch_size': 8, 'flops': 9808422927232}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5378812603520}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5378812603520}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4429610391296}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5062411866112}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6011614078336}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4746011128704}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3796808916480}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4113209653888}, {'type': 'perplexity', 'in_batch_dim': [8, 496], 'batch_size': 8, 'flops': 9808422927232}, {'type': 'perplexity', 'in_batch_dim': [8, 448], 'batch_size': 8, 'flops': 8859220715008}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6011614078336}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3796808916480}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3480408179072}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5062411866112}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4429610391296}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4113209653888}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4746011128704}, {'type': 'perplexity', 'in_batch_dim': [8, 320], 'batch_size': 8, 'flops': 6328014815744}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6011614078336}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3480408179072}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3796808916480}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3796808916480}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4746011128704}, {'type': 'perplexity', 'in_batch_dim': [8, 416], 'batch_size': 8, 'flops': 8226419240192}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5695213340928}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6011614078336}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6011614078336}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4746011128704}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3480408179072}, {'type': 'perplexity', 'in_batch_dim': [8, 160], 'batch_size': 8, 'flops': 3164007441664}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5695213340928}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3480408179072}, {'type': 'perplexity', 'in_batch_dim': [8, 400], 'batch_size': 8, 'flops': 7910018502784}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4429610391296}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5378812603520}, {'type': 'perplexity', 'in_batch_dim': [8, 400], 'batch_size': 8, 'flops': 7910018502784}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5378812603520}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3480408179072}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6011614078336}, {'type': 'perplexity', 'in_batch_dim': [8, 160], 'batch_size': 8, 'flops': 3164007441664}, {'type': 'perplexity', 'in_batch_dim': [8, 352], 'batch_size': 8, 'flops': 6960816290560}, {'type': 'perplexity', 'in_batch_dim': [8, 320], 'batch_size': 8, 'flops': 6328014815744}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5062411866112}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5062411866112}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4113209653888}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4429610391296}, {'type': 'perplexity', 'in_batch_dim': [8, 352], 'batch_size': 8, 'flops': 6960816290560}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4113209653888}, {'type': 'perplexity', 'in_batch_dim': [8, 160], 'batch_size': 8, 'flops': 3164007441664}, {'type': 'perplexity', 'in_batch_dim': [8, 336], 'batch_size': 8, 'flops': 6644415553152}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4429610391296}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3796808916480}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5378812603520}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5378812603520}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4113209653888}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5378812603520}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3796808916480}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5378812603520}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5378812603520}, {'type': 'perplexity', 'in_batch_dim': [8, 160], 'batch_size': 8, 'flops': 3164007441664}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3480408179072}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6011614078336}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5695213340928}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5378812603520}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5378812603520}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4429610391296}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4746011128704}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6011614078336}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5062411866112}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6011614078336}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5378812603520}, {'type': 'perplexity', 'in_batch_dim': [8, 160], 'batch_size': 8, 'flops': 3164007441664}, {'type': 'perplexity', 'in_batch_dim': [8, 160], 'batch_size': 8, 'flops': 3164007441664}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5062411866112}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5062411866112}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3796808916480}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6011614078336}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4113209653888}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4746011128704}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5378812603520}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3480408179072}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3796808916480}, {'type': 'perplexity', 'in_batch_dim': [8, 320], 'batch_size': 8, 'flops': 6328014815744}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5062411866112}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4746011128704}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5062411866112}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4746011128704}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6011614078336}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3796808916480}, {'type': 'perplexity', 'in_batch_dim': [8, 160], 'batch_size': 8, 'flops': 3164007441664}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2847606704256}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4429610391296}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4746011128704}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5062411866112}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5378812603520}, {'type': 'perplexity', 'in_batch_dim': [8, 464], 'batch_size': 8, 'flops': 9175621452416}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4113209653888}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3480408179072}, {'type': 'perplexity', 'in_batch_dim': [8, 384], 'batch_size': 8, 'flops': 7593617765376}, {'type': 'perplexity', 'in_batch_dim': [8, 448], 'batch_size': 8, 'flops': 8859220715008}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5378812603520}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4113209653888}, {'type': 'perplexity', 'in_batch_dim': [8, 320], 'batch_size': 8, 'flops': 6328014815744}, {'type': 'perplexity', 'in_batch_dim': [8, 560], 'batch_size': 8, 'flops': 11074025876864}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5378812603520}, {'type': 'perplexity', 'in_batch_dim': [8, 160], 'batch_size': 8, 'flops': 3164007441664}, {'type': 'perplexity', 'in_batch_dim': [8, 320], 'batch_size': 8, 'flops': 6328014815744}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5062411866112}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6011614078336}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4746011128704}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6011614078336}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3796808916480}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4429610391296}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4113209653888}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4746011128704}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3480408179072}, {'type': 'perplexity', 'in_batch_dim': [8, 384], 'batch_size': 8, 'flops': 7593617765376}, {'type': 'perplexity', 'in_batch_dim': [8, 576], 'batch_size': 8, 'flops': 11390426614272}, {'type': 'perplexity', 'in_batch_dim': [8, 160], 'batch_size': 8, 'flops': 3164007441664}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4746011128704}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4746011128704}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4429610391296}, {'type': 'perplexity', 'in_batch_dim': [8, 336], 'batch_size': 8, 'flops': 6644415553152}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4746011128704}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3796808916480}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3796808916480}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5062411866112}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5378812603520}, {'type': 'perplexity', 'in_batch_dim': [8, 368], 'batch_size': 8, 'flops': 7277217027968}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3480408179072}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4113209653888}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4746011128704}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3480408179072}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4746011128704}, {'type': 'perplexity', 'in_batch_dim': [8, 1168], 'batch_size': 8, 'flops': 23097253898368}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5062411866112}, {'type': 'perplexity', 'in_batch_dim': [8, 160], 'batch_size': 8, 'flops': 3164007441664}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4746011128704}, {'type': 'perplexity', 'in_batch_dim': [8, 640], 'batch_size': 8, 'flops': 12656029563904}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5378812603520}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4429610391296}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4429610391296}, {'type': 'perplexity', 'in_batch_dim': [8, 384], 'batch_size': 8, 'flops': 7593617765376}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3480408179072}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4113209653888}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3796808916480}, {'type': 'perplexity', 'in_batch_dim': [8, 336], 'batch_size': 8, 'flops': 6644415553152}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3480408179072}, {'type': 'perplexity', 'in_batch_dim': [6, 208], 'batch_size': 8, 'flops': 4113209653888}], 'timestamp': '2025-09-10 02:21:35.133425', 'step': 2646, 'epoch': 2} {'type': 'pplx', 'content': 18574059.11035138, 'timestamp': '2025-09-10 02:21:35.136168', 'step': 2646, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 6644714900096}, 'timestamp': '2025-09-10 02:21:35.167002', 'step': 2646, 'epoch': 2} {'type': 'loss', 'content': 0.0029080223757773638, 'timestamp': '2025-09-10 02:21:35.172975', 'step': 2647, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 304], 'flops': 9017733530176}, 'timestamp': '2025-09-10 02:21:35.204002', 'step': 2647, 'epoch': 2} {'type': 'loss', 'content': 0.0013911023270338774, 'timestamp': '2025-09-10 02:21:35.236493', 'step': 2648, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 8543129804160}, 'timestamp': '2025-09-10 02:21:35.268728', 'step': 2648, 'epoch': 2} {'type': 'loss', 'content': 0.001914841472171247, 'timestamp': '2025-09-10 02:21:35.276255', 'step': 2649, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 8068526078144}, 'timestamp': '2025-09-10 02:21:35.309084', 'step': 2649, 'epoch': 2} {'type': 'loss', 'content': 0.017167022451758385, 'timestamp': '2025-09-10 02:21:35.318997', 'step': 2650, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 6644714900096}, 'timestamp': '2025-09-10 02:21:35.350982', 'step': 2650, 'epoch': 2} {'type': 'loss', 'content': 0.003129334654659033, 'timestamp': '2025-09-10 02:21:35.357928', 'step': 2651, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 352], 'flops': 10441544708224}, 'timestamp': '2025-09-10 02:21:35.392583', 'step': 2651, 'epoch': 2} {'type': 'loss', 'content': 0.0010660120751708746, 'timestamp': '2025-09-10 02:21:35.427184', 'step': 2652, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 384], 'flops': 11390752160256}, 'timestamp': '2025-09-10 02:21:35.461172', 'step': 2652, 'epoch': 2} {'type': 'loss', 'content': 0.0017405982362106442, 'timestamp': '2025-09-10 02:21:35.474502', 'step': 2653, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 6644714900096}, 'timestamp': '2025-09-10 02:21:35.505765', 'step': 2653, 'epoch': 2} {'type': 'loss', 'content': 0.0052419803105294704, 'timestamp': '2025-09-10 02:21:35.512620', 'step': 2654, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 176], 'flops': 5220903722048}, 'timestamp': '2025-09-10 02:21:35.542954', 'step': 2654, 'epoch': 2} {'type': 'loss', 'content': 0.008888996206223965, 'timestamp': '2025-09-10 02:21:35.547239', 'step': 2655, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 400], 'flops': 11865355886272}, 'timestamp': '2025-09-10 02:21:35.585764', 'step': 2655, 'epoch': 2} {'type': 'loss', 'content': 0.0006996404263190925, 'timestamp': '2025-09-10 02:21:35.622348', 'step': 2656, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 4746299996032}, 'timestamp': '2025-09-10 02:21:35.653762', 'step': 2656, 'epoch': 2} {'type': 'loss', 'content': 0.013661734759807587, 'timestamp': '2025-09-10 02:21:35.655988', 'step': 2657, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 5695507448064}, 'timestamp': '2025-09-10 02:21:35.686175', 'step': 2657, 'epoch': 2} {'type': 'loss', 'content': 0.007183533161878586, 'timestamp': '2025-09-10 02:21:35.690706', 'step': 2658, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 8068526078144}, 'timestamp': '2025-09-10 02:21:35.723778', 'step': 2658, 'epoch': 2} {'type': 'loss', 'content': 0.059785980731248856, 'timestamp': '2025-09-10 02:21:35.733799', 'step': 2659, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 8068526078144}, 'timestamp': '2025-09-10 02:21:35.765142', 'step': 2659, 'epoch': 2} {'type': 'loss', 'content': 0.0015089567750692368, 'timestamp': '2025-09-10 02:21:35.796304', 'step': 2660, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 7119318626112}, 'timestamp': '2025-09-10 02:21:35.831263', 'step': 2660, 'epoch': 2} {'type': 'loss', 'content': 0.0020707848016172647, 'timestamp': '2025-09-10 02:21:35.835764', 'step': 2661, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 8068526078144}, 'timestamp': '2025-09-10 02:21:35.873478', 'step': 2661, 'epoch': 2} {'type': 'loss', 'content': 0.0033768482971936464, 'timestamp': '2025-09-10 02:21:35.882373', 'step': 2662, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 8543129804160}, 'timestamp': '2025-09-10 02:21:35.921734', 'step': 2662, 'epoch': 2} {'type': 'loss', 'content': 0.00554437842220068, 'timestamp': '2025-09-10 02:21:35.931603', 'step': 2663, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 320], 'flops': 9492337256192}, 'timestamp': '2025-09-10 02:21:35.970955', 'step': 2663, 'epoch': 2} {'type': 'loss', 'content': 0.008375253528356552, 'timestamp': '2025-09-10 02:21:36.003311', 'step': 2664, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 7593922352128}, 'timestamp': '2025-09-10 02:21:36.049885', 'step': 2664, 'epoch': 2} {'type': 'loss', 'content': 0.003926243167370558, 'timestamp': '2025-09-10 02:21:36.055742', 'step': 2665, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 6644714900096}, 'timestamp': '2025-09-10 02:21:36.101046', 'step': 2665, 'epoch': 2} {'type': 'loss', 'content': 0.0011568154441192746, 'timestamp': '2025-09-10 02:21:36.108919', 'step': 2666, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 176], 'flops': 5220903722048}, 'timestamp': '2025-09-10 02:21:36.160206', 'step': 2666, 'epoch': 2} {'type': 'loss', 'content': 0.0036792331375181675, 'timestamp': '2025-09-10 02:21:36.167431', 'step': 2667, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 6170111174080}, 'timestamp': '2025-09-10 02:21:36.223847', 'step': 2667, 'epoch': 2} {'type': 'loss', 'content': 0.0026783072389662266, 'timestamp': '2025-09-10 02:21:36.253011', 'step': 2668, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 384], 'flops': 11390752160256}, 'timestamp': '2025-09-10 02:21:36.303802', 'step': 2668, 'epoch': 2} {'type': 'loss', 'content': 0.0006417171680368483, 'timestamp': '2025-09-10 02:21:36.317108', 'step': 2669, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 352], 'flops': 10441544708224}, 'timestamp': '2025-09-10 02:21:36.378980', 'step': 2669, 'epoch': 2} {'type': 'loss', 'content': 0.01134135015308857, 'timestamp': '2025-09-10 02:21:36.392637', 'step': 2670, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 304], 'flops': 9017733530176}, 'timestamp': '2025-09-10 02:21:36.435220', 'step': 2670, 'epoch': 2} {'type': 'loss', 'content': 0.004530813079327345, 'timestamp': '2025-09-10 02:21:36.446214', 'step': 2671, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 176], 'flops': 5220903722048}, 'timestamp': '2025-09-10 02:21:36.481707', 'step': 2671, 'epoch': 2} {'type': 'loss', 'content': 0.0019003379857167602, 'timestamp': '2025-09-10 02:21:36.506514', 'step': 2672, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 6170111174080}, 'timestamp': '2025-09-10 02:21:36.537494', 'step': 2672, 'epoch': 2} {'type': 'loss', 'content': 0.009907567873597145, 'timestamp': '2025-09-10 02:21:36.541757', 'step': 2673, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 7119318626112}, 'timestamp': '2025-09-10 02:21:36.575079', 'step': 2673, 'epoch': 2} {'type': 'loss', 'content': 0.0017410024302080274, 'timestamp': '2025-09-10 02:21:36.582406', 'step': 2674, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 8543129804160}, 'timestamp': '2025-09-10 02:21:36.613848', 'step': 2674, 'epoch': 2} {'type': 'loss', 'content': 0.0021735227201133966, 'timestamp': '2025-09-10 02:21:36.624645', 'step': 2675, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 6170111174080}, 'timestamp': '2025-09-10 02:21:36.656011', 'step': 2675, 'epoch': 2} {'type': 'loss', 'content': 0.0014428169233724475, 'timestamp': '2025-09-10 02:21:36.683637', 'step': 2676, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 8068526078144}, 'timestamp': '2025-09-10 02:21:36.718504', 'step': 2676, 'epoch': 2} {'type': 'loss', 'content': 0.00035420857602730393, 'timestamp': '2025-09-10 02:21:36.724320', 'step': 2677, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 400], 'flops': 11865355886272}, 'timestamp': '2025-09-10 02:21:36.764578', 'step': 2677, 'epoch': 2} {'type': 'loss', 'content': 0.004614558536559343, 'timestamp': '2025-09-10 02:21:36.780152', 'step': 2678, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 7593922352128}, 'timestamp': '2025-09-10 02:21:36.818076', 'step': 2678, 'epoch': 2} {'type': 'loss', 'content': 0.006453771609812975, 'timestamp': '2025-09-10 02:21:36.824490', 'step': 2679, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 320], 'flops': 9492337256192}, 'timestamp': '2025-09-10 02:21:36.872335', 'step': 2679, 'epoch': 2} {'type': 'loss', 'content': 0.034508321434259415, 'timestamp': '2025-09-10 02:21:36.903162', 'step': 2680, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 7119318626112}, 'timestamp': '2025-09-10 02:21:36.960264', 'step': 2680, 'epoch': 2} {'type': 'loss', 'content': 0.011013428680598736, 'timestamp': '2025-09-10 02:21:36.965747', 'step': 2681, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 7593922352128}, 'timestamp': '2025-09-10 02:21:37.008533', 'step': 2681, 'epoch': 2} {'type': 'loss', 'content': 0.015115066431462765, 'timestamp': '2025-09-10 02:21:37.015238', 'step': 2682, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 5695507448064}, 'timestamp': '2025-09-10 02:21:37.055473', 'step': 2682, 'epoch': 2} {'type': 'loss', 'content': 0.00047675202949903905, 'timestamp': '2025-09-10 02:21:37.060861', 'step': 2683, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 5695507448064}, 'timestamp': '2025-09-10 02:21:37.100431', 'step': 2683, 'epoch': 2} {'type': 'loss', 'content': 0.03187503293156624, 'timestamp': '2025-09-10 02:21:37.127892', 'step': 2684, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 8068526078144}, 'timestamp': '2025-09-10 02:21:37.174265', 'step': 2684, 'epoch': 2} {'type': 'loss', 'content': 0.0006218705675564706, 'timestamp': '2025-09-10 02:21:37.182305', 'step': 2685, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 5695507448064}, 'timestamp': '2025-09-10 02:21:37.213798', 'step': 2685, 'epoch': 2} {'type': 'loss', 'content': 0.0013111613225191832, 'timestamp': '2025-09-10 02:21:37.218321', 'step': 2686, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 6170111174080}, 'timestamp': '2025-09-10 02:21:37.248961', 'step': 2686, 'epoch': 2} {'type': 'loss', 'content': 0.007048290688544512, 'timestamp': '2025-09-10 02:21:37.255708', 'step': 2687, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 6644714900096}, 'timestamp': '2025-09-10 02:21:37.287452', 'step': 2687, 'epoch': 2} {'type': 'loss', 'content': 0.0017233153339475393, 'timestamp': '2025-09-10 02:21:37.315254', 'step': 2688, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 8068526078144}, 'timestamp': '2025-09-10 02:21:37.348112', 'step': 2688, 'epoch': 2} {'type': 'loss', 'content': 0.0018436602549627423, 'timestamp': '2025-09-10 02:21:37.355939', 'step': 2689, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 7593922352128}, 'timestamp': '2025-09-10 02:21:37.394439', 'step': 2689, 'epoch': 2} {'type': 'loss', 'content': 0.01839214749634266, 'timestamp': '2025-09-10 02:21:37.402313', 'step': 2690, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 6170111174080}, 'timestamp': '2025-09-10 02:21:37.437257', 'step': 2690, 'epoch': 2} {'type': 'loss', 'content': 0.006622764747589827, 'timestamp': '2025-09-10 02:21:37.444049', 'step': 2691, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 6644714900096}, 'timestamp': '2025-09-10 02:21:37.479721', 'step': 2691, 'epoch': 2} {'type': 'loss', 'content': 0.007068789564073086, 'timestamp': '2025-09-10 02:21:37.507432', 'step': 2692, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 6170111174080}, 'timestamp': '2025-09-10 02:21:37.544360', 'step': 2692, 'epoch': 2} {'type': 'loss', 'content': 0.0019901886116713285, 'timestamp': '2025-09-10 02:21:37.548756', 'step': 2693, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 5695507448064}, 'timestamp': '2025-09-10 02:21:37.586064', 'step': 2693, 'epoch': 2} {'type': 'loss', 'content': 0.0049681165255606174, 'timestamp': '2025-09-10 02:21:37.590610', 'step': 2694, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 176], 'flops': 5220903722048}, 'timestamp': '2025-09-10 02:21:37.621080', 'step': 2694, 'epoch': 2} {'type': 'loss', 'content': 0.0012007238110527396, 'timestamp': '2025-09-10 02:21:37.625231', 'step': 2695, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 5695507448064}, 'timestamp': '2025-09-10 02:21:37.655813', 'step': 2695, 'epoch': 2} {'type': 'loss', 'content': 0.008289673365652561, 'timestamp': '2025-09-10 02:21:37.681152', 'step': 2696, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 176], 'flops': 5220903722048}, 'timestamp': '2025-09-10 02:21:37.712672', 'step': 2696, 'epoch': 2} {'type': 'loss', 'content': 0.0011843375395983458, 'timestamp': '2025-09-10 02:21:37.714804', 'step': 2697, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 8068526078144}, 'timestamp': '2025-09-10 02:21:37.746400', 'step': 2697, 'epoch': 2} {'type': 'loss', 'content': 0.0065173497423529625, 'timestamp': '2025-09-10 02:21:37.756744', 'step': 2698, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 4746299996032}, 'timestamp': '2025-09-10 02:21:37.787428', 'step': 2698, 'epoch': 2} {'type': 'loss', 'content': 0.00036999728763476014, 'timestamp': '2025-09-10 02:21:37.790044', 'step': 2699, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 6644714900096}, 'timestamp': '2025-09-10 02:21:37.827483', 'step': 2699, 'epoch': 2} {'type': 'loss', 'content': 0.01047492679208517, 'timestamp': '2025-09-10 02:21:37.855289', 'step': 2700, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 7119318626112}, 'timestamp': '2025-09-10 02:21:37.908537', 'step': 2700, 'epoch': 2} {'type': 'loss', 'content': 0.0033648067619651556, 'timestamp': '2025-09-10 02:21:37.923693', 'step': 2701, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 304], 'flops': 9017733530176}, 'timestamp': '2025-09-10 02:21:38.005482', 'step': 2701, 'epoch': 2} {'type': 'loss', 'content': 0.001408770913258195, 'timestamp': '2025-09-10 02:21:38.022992', 'step': 2702, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 176], 'flops': 5220903722048}, 'timestamp': '2025-09-10 02:21:38.106573', 'step': 2702, 'epoch': 2} {'type': 'loss', 'content': 0.011482964269816875, 'timestamp': '2025-09-10 02:21:38.117121', 'step': 2703, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 8543129804160}, 'timestamp': '2025-09-10 02:21:38.165908', 'step': 2703, 'epoch': 2} {'type': 'loss', 'content': 0.0008962144493125379, 'timestamp': '2025-09-10 02:21:38.204532', 'step': 2704, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 7119318626112}, 'timestamp': '2025-09-10 02:21:38.289760', 'step': 2704, 'epoch': 2} {'type': 'loss', 'content': 0.0022653231862932444, 'timestamp': '2025-09-10 02:21:38.294943', 'step': 2705, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 6644714900096}, 'timestamp': '2025-09-10 02:21:38.365940', 'step': 2705, 'epoch': 2} {'type': 'loss', 'content': 0.013882993720471859, 'timestamp': '2025-09-10 02:21:38.383012', 'step': 2706, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 6644714900096}, 'timestamp': '2025-09-10 02:21:38.457044', 'step': 2706, 'epoch': 2} {'type': 'loss', 'content': 0.001253266236744821, 'timestamp': '2025-09-10 02:21:38.464033', 'step': 2707, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 8543129804160}, 'timestamp': '2025-09-10 02:21:38.543788', 'step': 2707, 'epoch': 2} {'type': 'loss', 'content': 0.0013703681761398911, 'timestamp': '2025-09-10 02:21:38.575094', 'step': 2708, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 6170111174080}, 'timestamp': '2025-09-10 02:21:38.622236', 'step': 2708, 'epoch': 2} {'type': 'loss', 'content': 0.0033048451878130436, 'timestamp': '2025-09-10 02:21:38.630211', 'step': 2709, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 6644714900096}, 'timestamp': '2025-09-10 02:21:38.675604', 'step': 2709, 'epoch': 2} {'type': 'loss', 'content': 0.0006388475303538144, 'timestamp': '2025-09-10 02:21:38.682726', 'step': 2710, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 7593922352128}, 'timestamp': '2025-09-10 02:21:38.721884', 'step': 2710, 'epoch': 2} {'type': 'loss', 'content': 0.018123749643564224, 'timestamp': '2025-09-10 02:21:38.729692', 'step': 2711, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 304], 'flops': 9017733530176}, 'timestamp': '2025-09-10 02:21:38.783696', 'step': 2711, 'epoch': 2} {'type': 'loss', 'content': 0.003044202458113432, 'timestamp': '2025-09-10 02:21:38.816789', 'step': 2712, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 336], 'flops': 9966940982208}, 'timestamp': '2025-09-10 02:21:38.862713', 'step': 2712, 'epoch': 2} {'type': 'loss', 'content': 0.0013637479860335588, 'timestamp': '2025-09-10 02:21:38.875394', 'step': 2713, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 8543129804160}, 'timestamp': '2025-09-10 02:21:38.929319', 'step': 2713, 'epoch': 2} {'type': 'loss', 'content': 0.0018133390694856644, 'timestamp': '2025-09-10 02:21:38.940358', 'step': 2714, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 656], 'flops': 19459015502528}, 'timestamp': '2025-09-10 02:21:39.009625', 'step': 2714, 'epoch': 2} {'type': 'loss', 'content': 0.01170498225837946, 'timestamp': '2025-09-10 02:21:39.033076', 'step': 2715, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 7119318626112}, 'timestamp': '2025-09-10 02:21:39.075710', 'step': 2715, 'epoch': 2} {'type': 'loss', 'content': 0.0006123408675193787, 'timestamp': '2025-09-10 02:21:39.103674', 'step': 2716, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 6170111174080}, 'timestamp': '2025-09-10 02:21:39.153884', 'step': 2716, 'epoch': 2} {'type': 'loss', 'content': 0.0004910778952762485, 'timestamp': '2025-09-10 02:21:39.159177', 'step': 2717, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 7593922352128}, 'timestamp': '2025-09-10 02:21:39.204546', 'step': 2717, 'epoch': 2} {'type': 'loss', 'content': 0.0005400904337875545, 'timestamp': '2025-09-10 02:21:39.212357', 'step': 2718, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 6644714900096}, 'timestamp': '2025-09-10 02:21:39.265552', 'step': 2718, 'epoch': 2} {'type': 'loss', 'content': 0.02437109872698784, 'timestamp': '2025-09-10 02:21:39.272686', 'step': 2719, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 6170111174080}, 'timestamp': '2025-09-10 02:21:39.311229', 'step': 2719, 'epoch': 2} {'type': 'loss', 'content': 0.003212881973013282, 'timestamp': '2025-09-10 02:21:39.339010', 'step': 2720, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 320], 'flops': 9492337256192}, 'timestamp': '2025-09-10 02:21:39.389143', 'step': 2720, 'epoch': 2} {'type': 'loss', 'content': 0.004050101153552532, 'timestamp': '2025-09-10 02:21:39.399713', 'step': 2721, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 7119318626112}, 'timestamp': '2025-09-10 02:21:39.441377', 'step': 2721, 'epoch': 2} {'type': 'loss', 'content': 0.0005341669311746955, 'timestamp': '2025-09-10 02:21:39.448999', 'step': 2722, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 7593922352128}, 'timestamp': '2025-09-10 02:21:39.488414', 'step': 2722, 'epoch': 2} {'type': 'loss', 'content': 0.0020567751489579678, 'timestamp': '2025-09-10 02:21:39.496393', 'step': 2723, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 7119318626112}, 'timestamp': '2025-09-10 02:21:39.534121', 'step': 2723, 'epoch': 2} {'type': 'loss', 'content': 0.0017389410641044378, 'timestamp': '2025-09-10 02:21:39.562692', 'step': 2724, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 6644714900096}, 'timestamp': '2025-09-10 02:21:39.640851', 'step': 2724, 'epoch': 2} {'type': 'loss', 'content': 0.047998156398534775, 'timestamp': '2025-09-10 02:21:39.658482', 'step': 2725, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 6170111174080}, 'timestamp': '2025-09-10 02:21:39.726652', 'step': 2725, 'epoch': 2} {'type': 'loss', 'content': 0.0006225144607014954, 'timestamp': '2025-09-10 02:21:39.743044', 'step': 2726, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 6644714900096}, 'timestamp': '2025-09-10 02:21:39.810940', 'step': 2726, 'epoch': 2} {'type': 'loss', 'content': 0.0033620852045714855, 'timestamp': '2025-09-10 02:21:39.827193', 'step': 2727, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 176], 'flops': 5220903722048}, 'timestamp': '2025-09-10 02:21:39.890724', 'step': 2727, 'epoch': 2} {'type': 'loss', 'content': 0.0009153550490736961, 'timestamp': '2025-09-10 02:21:39.915738', 'step': 2728, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 8543129804160}, 'timestamp': '2025-09-10 02:21:39.965315', 'step': 2728, 'epoch': 2} {'type': 'loss', 'content': 0.0016450297553092241, 'timestamp': '2025-09-10 02:21:39.973894', 'step': 2729, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 6644714900096}, 'timestamp': '2025-09-10 02:21:40.010673', 'step': 2729, 'epoch': 2} {'type': 'loss', 'content': 0.00031730628688819706, 'timestamp': '2025-09-10 02:21:40.017831', 'step': 2730, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 7119318626112}, 'timestamp': '2025-09-10 02:21:40.051540', 'step': 2730, 'epoch': 2} {'type': 'loss', 'content': 0.05042002350091934, 'timestamp': '2025-09-10 02:21:40.059029', 'step': 2731, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 6170111174080}, 'timestamp': '2025-09-10 02:21:40.097441', 'step': 2731, 'epoch': 2} {'type': 'loss', 'content': 0.0009090257226489484, 'timestamp': '2025-09-10 02:21:40.125225', 'step': 2732, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 320], 'flops': 9492337256192}, 'timestamp': '2025-09-10 02:21:40.168073', 'step': 2732, 'epoch': 2} {'type': 'loss', 'content': 0.011402477510273457, 'timestamp': '2025-09-10 02:21:40.178050', 'step': 2733, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 512], 'flops': 15187581968384}, 'timestamp': '2025-09-10 02:21:40.236822', 'step': 2733, 'epoch': 2} {'type': 'loss', 'content': 0.006502915173768997, 'timestamp': '2025-09-10 02:21:40.254504', 'step': 2734, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 7119318626112}, 'timestamp': '2025-09-10 02:21:40.294820', 'step': 2734, 'epoch': 2} {'type': 'loss', 'content': 0.04736243933439255, 'timestamp': '2025-09-10 02:21:40.301645', 'step': 2735, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 6170111174080}, 'timestamp': '2025-09-10 02:21:40.339970', 'step': 2735, 'epoch': 2} {'type': 'loss', 'content': 0.0024216361343860626, 'timestamp': '2025-09-10 02:21:40.367094', 'step': 2736, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 6644714900096}, 'timestamp': '2025-09-10 02:21:40.400543', 'step': 2736, 'epoch': 2} {'type': 'loss', 'content': 0.004260449670255184, 'timestamp': '2025-09-10 02:21:40.404660', 'step': 2737, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 6170111174080}, 'timestamp': '2025-09-10 02:21:40.436564', 'step': 2737, 'epoch': 2} {'type': 'loss', 'content': 0.0016414711717516184, 'timestamp': '2025-09-10 02:21:40.442927', 'step': 2738, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 6170111174080}, 'timestamp': '2025-09-10 02:21:40.473988', 'step': 2738, 'epoch': 2} {'type': 'loss', 'content': 0.0007454793085344136, 'timestamp': '2025-09-10 02:21:40.480752', 'step': 2739, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 6170111174080}, 'timestamp': '2025-09-10 02:21:40.511868', 'step': 2739, 'epoch': 2} {'type': 'loss', 'content': 0.025238368660211563, 'timestamp': '2025-09-10 02:21:40.539369', 'step': 2740, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 8068526078144}, 'timestamp': '2025-09-10 02:21:40.573420', 'step': 2740, 'epoch': 2} {'type': 'loss', 'content': 0.0014565506717190146, 'timestamp': '2025-09-10 02:21:40.581040', 'step': 2741, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 336], 'flops': 9966940982208}, 'timestamp': '2025-09-10 02:21:40.614852', 'step': 2741, 'epoch': 2} {'type': 'loss', 'content': 0.0028659238014370203, 'timestamp': '2025-09-10 02:21:40.628243', 'step': 2742, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 6170111174080}, 'timestamp': '2025-09-10 02:21:40.659481', 'step': 2742, 'epoch': 2} {'type': 'loss', 'content': 0.005315977614372969, 'timestamp': '2025-09-10 02:21:40.666068', 'step': 2743, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 6644714900096}, 'timestamp': '2025-09-10 02:21:40.697352', 'step': 2743, 'epoch': 2} {'type': 'loss', 'content': 0.0011978724505752325, 'timestamp': '2025-09-10 02:21:40.725029', 'step': 2744, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 352], 'flops': 10441544708224}, 'timestamp': '2025-09-10 02:21:40.757583', 'step': 2744, 'epoch': 2} {'type': 'loss', 'content': 0.0008306491072289646, 'timestamp': '2025-09-10 02:21:40.770565', 'step': 2745, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 6644714900096}, 'timestamp': '2025-09-10 02:21:40.801906', 'step': 2745, 'epoch': 2} {'type': 'loss', 'content': 0.002079846104606986, 'timestamp': '2025-09-10 02:21:40.808974', 'step': 2746, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 176], 'flops': 5220903722048}, 'timestamp': '2025-09-10 02:21:40.840990', 'step': 2746, 'epoch': 2} {'type': 'loss', 'content': 0.013913876377046108, 'timestamp': '2025-09-10 02:21:40.845922', 'step': 2747, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 8543129804160}, 'timestamp': '2025-09-10 02:21:40.884272', 'step': 2747, 'epoch': 2} {'type': 'loss', 'content': 0.0014017752837389708, 'timestamp': '2025-09-10 02:21:40.915824', 'step': 2748, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 7593922352128}, 'timestamp': '2025-09-10 02:21:40.952577', 'step': 2748, 'epoch': 2} {'type': 'loss', 'content': 0.018380844965577126, 'timestamp': '2025-09-10 02:21:40.957550', 'step': 2749, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 8068526078144}, 'timestamp': '2025-09-10 02:21:40.991539', 'step': 2749, 'epoch': 2} {'type': 'loss', 'content': 0.04001007229089737, 'timestamp': '2025-09-10 02:21:41.001708', 'step': 2750, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 8068526078144}, 'timestamp': '2025-09-10 02:21:41.039352', 'step': 2750, 'epoch': 2} {'type': 'loss', 'content': 0.0020031663589179516, 'timestamp': '2025-09-10 02:21:41.049458', 'step': 2751, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 7119318626112}, 'timestamp': '2025-09-10 02:21:41.089957', 'step': 2751, 'epoch': 2} {'type': 'loss', 'content': 0.0007604836719110608, 'timestamp': '2025-09-10 02:21:41.118330', 'step': 2752, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 320], 'flops': 9492337256192}, 'timestamp': '2025-09-10 02:21:41.155487', 'step': 2752, 'epoch': 2} {'type': 'loss', 'content': 0.0003465786576271057, 'timestamp': '2025-09-10 02:21:41.165491', 'step': 2753, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 6644714900096}, 'timestamp': '2025-09-10 02:21:41.202972', 'step': 2753, 'epoch': 2} {'type': 'loss', 'content': 0.0002132646186510101, 'timestamp': '2025-09-10 02:21:41.209606', 'step': 2754, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 7119318626112}, 'timestamp': '2025-09-10 02:21:41.248213', 'step': 2754, 'epoch': 2} {'type': 'loss', 'content': 0.0009989741956815124, 'timestamp': '2025-09-10 02:21:41.255377', 'step': 2755, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 7119318626112}, 'timestamp': '2025-09-10 02:21:41.297555', 'step': 2755, 'epoch': 2} {'type': 'loss', 'content': 0.006549767684191465, 'timestamp': '2025-09-10 02:21:41.325684', 'step': 2756, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 5695507448064}, 'timestamp': '2025-09-10 02:21:41.364478', 'step': 2756, 'epoch': 2} {'type': 'loss', 'content': 0.001987830735743046, 'timestamp': '2025-09-10 02:21:41.366592', 'step': 2757, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 6170111174080}, 'timestamp': '2025-09-10 02:21:41.402406', 'step': 2757, 'epoch': 2} {'type': 'loss', 'content': 0.006204267032444477, 'timestamp': '2025-09-10 02:21:41.408956', 'step': 2758, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 8068526078144}, 'timestamp': '2025-09-10 02:21:41.446821', 'step': 2758, 'epoch': 2} {'type': 'loss', 'content': 0.01345762424170971, 'timestamp': '2025-09-10 02:21:41.456689', 'step': 2759, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 320], 'flops': 9492337256192}, 'timestamp': '2025-09-10 02:21:41.491228', 'step': 2759, 'epoch': 2} {'type': 'loss', 'content': 0.002160031348466873, 'timestamp': '2025-09-10 02:21:41.524617', 'step': 2760, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 5695507448064}, 'timestamp': '2025-09-10 02:21:41.555466', 'step': 2760, 'epoch': 2} {'type': 'loss', 'content': 0.0004899486084468663, 'timestamp': '2025-09-10 02:21:41.557921', 'step': 2761, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 6644714900096}, 'timestamp': '2025-09-10 02:21:41.588416', 'step': 2761, 'epoch': 2} {'type': 'loss', 'content': 0.002803497016429901, 'timestamp': '2025-09-10 02:21:41.595467', 'step': 2762, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 6170111174080}, 'timestamp': '2025-09-10 02:21:41.625790', 'step': 2762, 'epoch': 2} {'type': 'loss', 'content': 0.0015402629505842924, 'timestamp': '2025-09-10 02:21:41.632695', 'step': 2763, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 7119318626112}, 'timestamp': '2025-09-10 02:21:41.663176', 'step': 2763, 'epoch': 2} {'type': 'loss', 'content': 0.005135064013302326, 'timestamp': '2025-09-10 02:21:41.691459', 'step': 2764, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 304], 'flops': 9017733530176}, 'timestamp': '2025-09-10 02:21:41.722179', 'step': 2764, 'epoch': 2} {'type': 'loss', 'content': 0.0008388920687139034, 'timestamp': '2025-09-10 02:21:41.731503', 'step': 2765, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 320], 'flops': 9492337256192}, 'timestamp': '2025-09-10 02:21:41.763229', 'step': 2765, 'epoch': 2} {'type': 'loss', 'content': 0.001156167476437986, 'timestamp': '2025-09-10 02:21:41.775466', 'step': 2766, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 7119318626112}, 'timestamp': '2025-09-10 02:21:41.808417', 'step': 2766, 'epoch': 2} {'type': 'loss', 'content': 0.003134796628728509, 'timestamp': '2025-09-10 02:21:41.815705', 'step': 2767, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 6644714900096}, 'timestamp': '2025-09-10 02:21:41.847048', 'step': 2767, 'epoch': 2} {'type': 'loss', 'content': 0.03722445294260979, 'timestamp': '2025-09-10 02:21:41.874747', 'step': 2768, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 6170111174080}, 'timestamp': '2025-09-10 02:21:41.906579', 'step': 2768, 'epoch': 2} {'type': 'loss', 'content': 0.0030981996096670628, 'timestamp': '2025-09-10 02:21:41.910934', 'step': 2769, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 5695507448064}, 'timestamp': '2025-09-10 02:21:41.941649', 'step': 2769, 'epoch': 2} {'type': 'loss', 'content': 0.026767630130052567, 'timestamp': '2025-09-10 02:21:41.945986', 'step': 2770, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 6170111174080}, 'timestamp': '2025-09-10 02:21:41.977046', 'step': 2770, 'epoch': 2} {'type': 'loss', 'content': 0.004015625920146704, 'timestamp': '2025-09-10 02:21:41.983688', 'step': 2771, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 6644714900096}, 'timestamp': '2025-09-10 02:21:42.014899', 'step': 2771, 'epoch': 2} {'type': 'loss', 'content': 0.01961735263466835, 'timestamp': '2025-09-10 02:21:42.042822', 'step': 2772, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 8068526078144}, 'timestamp': '2025-09-10 02:21:42.074168', 'step': 2772, 'epoch': 2} {'type': 'loss', 'content': 0.004195111338049173, 'timestamp': '2025-09-10 02:21:42.081567', 'step': 2773, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 8068526078144}, 'timestamp': '2025-09-10 02:21:42.113471', 'step': 2773, 'epoch': 2} {'type': 'loss', 'content': 0.00503236660733819, 'timestamp': '2025-09-10 02:21:42.123489', 'step': 2774, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 7119318626112}, 'timestamp': '2025-09-10 02:21:42.155417', 'step': 2774, 'epoch': 2} {'type': 'loss', 'content': 0.00502787483856082, 'timestamp': '2025-09-10 02:21:42.162981', 'step': 2775, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 8068526078144}, 'timestamp': '2025-09-10 02:21:42.194347', 'step': 2775, 'epoch': 2} {'type': 'loss', 'content': 0.024155091494321823, 'timestamp': '2025-09-10 02:21:42.225406', 'step': 2776, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 8068526078144}, 'timestamp': '2025-09-10 02:21:42.256407', 'step': 2776, 'epoch': 2} {'type': 'loss', 'content': 0.02515154518187046, 'timestamp': '2025-09-10 02:21:42.264155', 'step': 2777, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 8543129804160}, 'timestamp': '2025-09-10 02:21:42.295541', 'step': 2777, 'epoch': 2} {'type': 'loss', 'content': 0.018512414768338203, 'timestamp': '2025-09-10 02:21:42.306224', 'step': 2778, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 8543129804160}, 'timestamp': '2025-09-10 02:21:42.338151', 'step': 2778, 'epoch': 2} {'type': 'loss', 'content': 0.006325080059468746, 'timestamp': '2025-09-10 02:21:42.349127', 'step': 2779, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 8068526078144}, 'timestamp': '2025-09-10 02:21:42.385992', 'step': 2779, 'epoch': 2} {'type': 'loss', 'content': 0.0009645558893680573, 'timestamp': '2025-09-10 02:21:42.417123', 'step': 2780, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 7593922352128}, 'timestamp': '2025-09-10 02:21:42.447826', 'step': 2780, 'epoch': 2} {'type': 'loss', 'content': 0.004604689311236143, 'timestamp': '2025-09-10 02:21:42.453228', 'step': 2781, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 6644714900096}, 'timestamp': '2025-09-10 02:21:42.483632', 'step': 2781, 'epoch': 2} {'type': 'loss', 'content': 0.004039444029331207, 'timestamp': '2025-09-10 02:21:42.490568', 'step': 2782, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 7119318626112}, 'timestamp': '2025-09-10 02:21:42.520587', 'step': 2782, 'epoch': 2} {'type': 'loss', 'content': 0.0015288168797269464, 'timestamp': '2025-09-10 02:21:42.528202', 'step': 2783, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 176], 'flops': 5220903722048}, 'timestamp': '2025-09-10 02:21:42.559405', 'step': 2783, 'epoch': 2} {'type': 'loss', 'content': 0.004764964338392019, 'timestamp': '2025-09-10 02:21:42.584432', 'step': 2784, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 8068526078144}, 'timestamp': '2025-09-10 02:21:42.615196', 'step': 2784, 'epoch': 2} {'type': 'loss', 'content': 0.001542671350762248, 'timestamp': '2025-09-10 02:21:42.623018', 'step': 2785, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 8543129804160}, 'timestamp': '2025-09-10 02:21:42.654012', 'step': 2785, 'epoch': 2} {'type': 'loss', 'content': 0.000828076503239572, 'timestamp': '2025-09-10 02:21:42.665094', 'step': 2786, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 8068526078144}, 'timestamp': '2025-09-10 02:21:42.696485', 'step': 2786, 'epoch': 2} {'type': 'loss', 'content': 0.0006047695060260594, 'timestamp': '2025-09-10 02:21:42.706673', 'step': 2787, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 304], 'flops': 9017733530176}, 'timestamp': '2025-09-10 02:21:42.737739', 'step': 2787, 'epoch': 2} {'type': 'loss', 'content': 0.002101297490298748, 'timestamp': '2025-09-10 02:21:42.770979', 'step': 2788, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:21:42.803580', 'step': 2788, 'epoch': 2} {'type': 'loss', 'content': 0.0025870150420814753, 'timestamp': '2025-09-10 02:21:42.806965', 'step': 2789, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 6644714900096}, 'timestamp': '2025-09-10 02:21:42.841421', 'step': 2789, 'epoch': 2} {'type': 'loss', 'content': 0.028584027662873268, 'timestamp': '2025-09-10 02:21:42.848544', 'step': 2790, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 6170111174080}, 'timestamp': '2025-09-10 02:21:42.887114', 'step': 2790, 'epoch': 2} {'type': 'loss', 'content': 0.035170648247003555, 'timestamp': '2025-09-10 02:21:42.893869', 'step': 2791, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 7119318626112}, 'timestamp': '2025-09-10 02:21:42.929384', 'step': 2791, 'epoch': 2} {'type': 'loss', 'content': 0.0014619800494983792, 'timestamp': '2025-09-10 02:21:42.957717', 'step': 2792, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 6644714900096}, 'timestamp': '2025-09-10 02:21:42.995938', 'step': 2792, 'epoch': 2} {'type': 'loss', 'content': 0.01359619665890932, 'timestamp': '2025-09-10 02:21:43.001446', 'step': 2793, 'epoch': 2} {'type': 'flops', 'content': [{'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5695213340928}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3796808916480}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4429610391296}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4746011128704}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4746011128704}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6011614078336}, {'type': 'perplexity', 'in_batch_dim': [8, 736], 'batch_size': 8, 'flops': 14554433988352}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3796808916480}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4113209653888}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3796808916480}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4746011128704}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2847606704256}, {'type': 'perplexity', 'in_batch_dim': [8, 320], 'batch_size': 8, 'flops': 6328014815744}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5378812603520}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3796808916480}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4746011128704}, {'type': 'perplexity', 'in_batch_dim': [8, 352], 'batch_size': 8, 'flops': 6960816290560}, {'type': 'perplexity', 'in_batch_dim': [8, 384], 'batch_size': 8, 'flops': 7593617765376}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5378812603520}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4429610391296}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4113209653888}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5062411866112}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5062411866112}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4429610391296}, {'type': 'perplexity', 'in_batch_dim': [8, 480], 'batch_size': 8, 'flops': 9492022189824}, {'type': 'perplexity', 'in_batch_dim': [8, 448], 'batch_size': 8, 'flops': 8859220715008}, {'type': 'perplexity', 'in_batch_dim': [8, 320], 'batch_size': 8, 'flops': 6328014815744}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3796808916480}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6011614078336}, {'type': 'perplexity', 'in_batch_dim': [8, 320], 'batch_size': 8, 'flops': 6328014815744}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5695213340928}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4746011128704}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4429610391296}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5378812603520}, {'type': 'perplexity', 'in_batch_dim': [8, 544], 'batch_size': 8, 'flops': 10757625139456}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4429610391296}, {'type': 'perplexity', 'in_batch_dim': [8, 320], 'batch_size': 8, 'flops': 6328014815744}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4113209653888}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4113209653888}, {'type': 'perplexity', 'in_batch_dim': [8, 320], 'batch_size': 8, 'flops': 6328014815744}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3480408179072}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5062411866112}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6011614078336}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4429610391296}, {'type': 'perplexity', 'in_batch_dim': [8, 384], 'batch_size': 8, 'flops': 7593617765376}, {'type': 'perplexity', 'in_batch_dim': [8, 416], 'batch_size': 8, 'flops': 8226419240192}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4746011128704}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3480408179072}, {'type': 'perplexity', 'in_batch_dim': [8, 384], 'batch_size': 8, 'flops': 7593617765376}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3480408179072}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3480408179072}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5378812603520}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3796808916480}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4113209653888}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5062411866112}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5378812603520}, {'type': 'perplexity', 'in_batch_dim': [8, 320], 'batch_size': 8, 'flops': 6328014815744}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4429610391296}, {'type': 'perplexity', 'in_batch_dim': [8, 160], 'batch_size': 8, 'flops': 3164007441664}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4113209653888}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5695213340928}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3796808916480}, {'type': 'perplexity', 'in_batch_dim': [8, 160], 'batch_size': 8, 'flops': 3164007441664}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6011614078336}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5062411866112}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4429610391296}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3480408179072}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4746011128704}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3480408179072}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4113209653888}, {'type': 'perplexity', 'in_batch_dim': [8, 624], 'batch_size': 8, 'flops': 12339628826496}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4113209653888}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5695213340928}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5062411866112}, {'type': 'perplexity', 'in_batch_dim': [8, 384], 'batch_size': 8, 'flops': 7593617765376}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4429610391296}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4113209653888}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5062411866112}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4429610391296}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5695213340928}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3796808916480}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5378812603520}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4429610391296}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5378812603520}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3796808916480}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5695213340928}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4429610391296}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5062411866112}, {'type': 'perplexity', 'in_batch_dim': [8, 160], 'batch_size': 8, 'flops': 3164007441664}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4746011128704}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5695213340928}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3796808916480}, {'type': 'perplexity', 'in_batch_dim': [8, 416], 'batch_size': 8, 'flops': 8226419240192}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3796808916480}, {'type': 'perplexity', 'in_batch_dim': [8, 160], 'batch_size': 8, 'flops': 3164007441664}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4746011128704}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3796808916480}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2847606704256}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5378812603520}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4429610391296}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4113209653888}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4429610391296}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6011614078336}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4113209653888}, {'type': 'perplexity', 'in_batch_dim': [8, 320], 'batch_size': 8, 'flops': 6328014815744}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4746011128704}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4746011128704}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3796808916480}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5062411866112}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3480408179072}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3796808916480}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3480408179072}, {'type': 'perplexity', 'in_batch_dim': [8, 352], 'batch_size': 8, 'flops': 6960816290560}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5062411866112}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4429610391296}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6011614078336}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5062411866112}, {'type': 'perplexity', 'in_batch_dim': [8, 576], 'batch_size': 8, 'flops': 11390426614272}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4429610391296}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3796808916480}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6011614078336}, {'type': 'perplexity', 'in_batch_dim': [8, 432], 'batch_size': 8, 'flops': 8542819977600}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5378812603520}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5378812603520}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5062411866112}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4113209653888}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4113209653888}, {'type': 'perplexity', 'in_batch_dim': [8, 368], 'batch_size': 8, 'flops': 7277217027968}, {'type': 'perplexity', 'in_batch_dim': [8, 160], 'batch_size': 8, 'flops': 3164007441664}, {'type': 'perplexity', 'in_batch_dim': [8, 368], 'batch_size': 8, 'flops': 7277217027968}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3796808916480}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4746011128704}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4113209653888}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4429610391296}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5695213340928}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5695213340928}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5062411866112}, {'type': 'perplexity', 'in_batch_dim': [8, 432], 'batch_size': 8, 'flops': 8542819977600}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5062411866112}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4113209653888}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3480408179072}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3480408179072}, {'type': 'perplexity', 'in_batch_dim': [8, 160], 'batch_size': 8, 'flops': 3164007441664}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4113209653888}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3796808916480}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5062411866112}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 368], 'batch_size': 8, 'flops': 7277217027968}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5378812603520}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5378812603520}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5062411866112}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2847606704256}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4429610391296}, {'type': 'perplexity', 'in_batch_dim': [8, 320], 'batch_size': 8, 'flops': 6328014815744}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3796808916480}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4429610391296}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4113209653888}, {'type': 'perplexity', 'in_batch_dim': [8, 384], 'batch_size': 8, 'flops': 7593617765376}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5378812603520}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4746011128704}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3480408179072}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5062411866112}, {'type': 'perplexity', 'in_batch_dim': [8, 320], 'batch_size': 8, 'flops': 6328014815744}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5378812603520}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4113209653888}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4113209653888}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5695213340928}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3480408179072}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6011614078336}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4746011128704}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5378812603520}, {'type': 'perplexity', 'in_batch_dim': [8, 320], 'batch_size': 8, 'flops': 6328014815744}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4429610391296}, {'type': 'perplexity', 'in_batch_dim': [8, 320], 'batch_size': 8, 'flops': 6328014815744}, {'type': 'perplexity', 'in_batch_dim': [8, 416], 'batch_size': 8, 'flops': 8226419240192}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5062411866112}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5062411866112}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2847606704256}, {'type': 'perplexity', 'in_batch_dim': [8, 336], 'batch_size': 8, 'flops': 6644415553152}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6011614078336}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5062411866112}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4113209653888}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5695213340928}, {'type': 'perplexity', 'in_batch_dim': [8, 416], 'batch_size': 8, 'flops': 8226419240192}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3796808916480}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5695213340928}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4746011128704}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4429610391296}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4113209653888}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3480408179072}, {'type': 'perplexity', 'in_batch_dim': [8, 320], 'batch_size': 8, 'flops': 6328014815744}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3480408179072}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5378812603520}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6011614078336}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4429610391296}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5695213340928}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5695213340928}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3480408179072}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4429610391296}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5062411866112}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3480408179072}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4113209653888}, {'type': 'perplexity', 'in_batch_dim': [8, 352], 'batch_size': 8, 'flops': 6960816290560}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4113209653888}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2847606704256}, {'type': 'perplexity', 'in_batch_dim': [8, 160], 'batch_size': 8, 'flops': 3164007441664}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3796808916480}, {'type': 'perplexity', 'in_batch_dim': [8, 496], 'batch_size': 8, 'flops': 9808422927232}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3480408179072}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4113209653888}, {'type': 'perplexity', 'in_batch_dim': [8, 544], 'batch_size': 8, 'flops': 10757625139456}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6011614078336}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5378812603520}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5695213340928}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4746011128704}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5695213340928}, {'type': 'perplexity', 'in_batch_dim': [8, 384], 'batch_size': 8, 'flops': 7593617765376}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3480408179072}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5378812603520}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3480408179072}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5378812603520}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5062411866112}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3796808916480}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5062411866112}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5695213340928}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5378812603520}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4429610391296}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5062411866112}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3480408179072}, {'type': 'perplexity', 'in_batch_dim': [8, 656], 'batch_size': 8, 'flops': 12972430301312}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5062411866112}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3796808916480}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3480408179072}, {'type': 'perplexity', 'in_batch_dim': [8, 160], 'batch_size': 8, 'flops': 3164007441664}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5695213340928}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6011614078336}, {'type': 'perplexity', 'in_batch_dim': [8, 320], 'batch_size': 8, 'flops': 6328014815744}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5062411866112}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5062411866112}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5695213340928}, {'type': 'perplexity', 'in_batch_dim': [8, 320], 'batch_size': 8, 'flops': 6328014815744}, {'type': 'perplexity', 'in_batch_dim': [8, 384], 'batch_size': 8, 'flops': 7593617765376}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5062411866112}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4746011128704}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4113209653888}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4746011128704}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4746011128704}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3480408179072}, {'type': 'perplexity', 'in_batch_dim': [8, 320], 'batch_size': 8, 'flops': 6328014815744}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5695213340928}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5062411866112}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4113209653888}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4746011128704}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2847606704256}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3796808916480}, {'type': 'perplexity', 'in_batch_dim': [8, 336], 'batch_size': 8, 'flops': 6644415553152}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4113209653888}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4746011128704}, {'type': 'perplexity', 'in_batch_dim': [8, 496], 'batch_size': 8, 'flops': 9808422927232}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5378812603520}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5378812603520}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4429610391296}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5062411866112}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6011614078336}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4746011128704}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3796808916480}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4113209653888}, {'type': 'perplexity', 'in_batch_dim': [8, 496], 'batch_size': 8, 'flops': 9808422927232}, {'type': 'perplexity', 'in_batch_dim': [8, 448], 'batch_size': 8, 'flops': 8859220715008}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6011614078336}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3796808916480}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3480408179072}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5062411866112}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4429610391296}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4113209653888}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4746011128704}, {'type': 'perplexity', 'in_batch_dim': [8, 320], 'batch_size': 8, 'flops': 6328014815744}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6011614078336}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3480408179072}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3796808916480}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3796808916480}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4746011128704}, {'type': 'perplexity', 'in_batch_dim': [8, 416], 'batch_size': 8, 'flops': 8226419240192}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5695213340928}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6011614078336}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6011614078336}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4746011128704}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3480408179072}, {'type': 'perplexity', 'in_batch_dim': [8, 160], 'batch_size': 8, 'flops': 3164007441664}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5695213340928}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3480408179072}, {'type': 'perplexity', 'in_batch_dim': [8, 400], 'batch_size': 8, 'flops': 7910018502784}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4429610391296}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5378812603520}, {'type': 'perplexity', 'in_batch_dim': [8, 400], 'batch_size': 8, 'flops': 7910018502784}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5378812603520}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3480408179072}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6011614078336}, {'type': 'perplexity', 'in_batch_dim': [8, 160], 'batch_size': 8, 'flops': 3164007441664}, {'type': 'perplexity', 'in_batch_dim': [8, 352], 'batch_size': 8, 'flops': 6960816290560}, {'type': 'perplexity', 'in_batch_dim': [8, 320], 'batch_size': 8, 'flops': 6328014815744}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5062411866112}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5062411866112}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4113209653888}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4429610391296}, {'type': 'perplexity', 'in_batch_dim': [8, 352], 'batch_size': 8, 'flops': 6960816290560}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4113209653888}, {'type': 'perplexity', 'in_batch_dim': [8, 160], 'batch_size': 8, 'flops': 3164007441664}, {'type': 'perplexity', 'in_batch_dim': [8, 336], 'batch_size': 8, 'flops': 6644415553152}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4429610391296}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3796808916480}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5378812603520}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5378812603520}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4113209653888}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5378812603520}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3796808916480}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5378812603520}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5378812603520}, {'type': 'perplexity', 'in_batch_dim': [8, 160], 'batch_size': 8, 'flops': 3164007441664}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3480408179072}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6011614078336}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5695213340928}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5378812603520}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5378812603520}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4429610391296}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4746011128704}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6011614078336}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5062411866112}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6011614078336}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5378812603520}, {'type': 'perplexity', 'in_batch_dim': [8, 160], 'batch_size': 8, 'flops': 3164007441664}, {'type': 'perplexity', 'in_batch_dim': [8, 160], 'batch_size': 8, 'flops': 3164007441664}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5062411866112}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5062411866112}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3796808916480}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6011614078336}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4113209653888}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4746011128704}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5378812603520}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3480408179072}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3796808916480}, {'type': 'perplexity', 'in_batch_dim': [8, 320], 'batch_size': 8, 'flops': 6328014815744}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5062411866112}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4746011128704}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5062411866112}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4746011128704}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6011614078336}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3796808916480}, {'type': 'perplexity', 'in_batch_dim': [8, 160], 'batch_size': 8, 'flops': 3164007441664}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2847606704256}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4429610391296}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4746011128704}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5062411866112}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5378812603520}, {'type': 'perplexity', 'in_batch_dim': [8, 464], 'batch_size': 8, 'flops': 9175621452416}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4113209653888}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3480408179072}, {'type': 'perplexity', 'in_batch_dim': [8, 384], 'batch_size': 8, 'flops': 7593617765376}, {'type': 'perplexity', 'in_batch_dim': [8, 448], 'batch_size': 8, 'flops': 8859220715008}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5378812603520}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4113209653888}, {'type': 'perplexity', 'in_batch_dim': [8, 320], 'batch_size': 8, 'flops': 6328014815744}, {'type': 'perplexity', 'in_batch_dim': [8, 560], 'batch_size': 8, 'flops': 11074025876864}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5378812603520}, {'type': 'perplexity', 'in_batch_dim': [8, 160], 'batch_size': 8, 'flops': 3164007441664}, {'type': 'perplexity', 'in_batch_dim': [8, 320], 'batch_size': 8, 'flops': 6328014815744}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5062411866112}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6011614078336}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4746011128704}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6011614078336}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3796808916480}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4429610391296}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4113209653888}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4746011128704}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3480408179072}, {'type': 'perplexity', 'in_batch_dim': [8, 384], 'batch_size': 8, 'flops': 7593617765376}, {'type': 'perplexity', 'in_batch_dim': [8, 576], 'batch_size': 8, 'flops': 11390426614272}, {'type': 'perplexity', 'in_batch_dim': [8, 160], 'batch_size': 8, 'flops': 3164007441664}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4746011128704}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4746011128704}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4429610391296}, {'type': 'perplexity', 'in_batch_dim': [8, 336], 'batch_size': 8, 'flops': 6644415553152}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4746011128704}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3796808916480}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3796808916480}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5062411866112}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5378812603520}, {'type': 'perplexity', 'in_batch_dim': [8, 368], 'batch_size': 8, 'flops': 7277217027968}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3480408179072}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4113209653888}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4746011128704}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3480408179072}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4746011128704}, {'type': 'perplexity', 'in_batch_dim': [8, 1168], 'batch_size': 8, 'flops': 23097253898368}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5062411866112}, {'type': 'perplexity', 'in_batch_dim': [8, 160], 'batch_size': 8, 'flops': 3164007441664}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4746011128704}, {'type': 'perplexity', 'in_batch_dim': [8, 640], 'batch_size': 8, 'flops': 12656029563904}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5378812603520}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4429610391296}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4429610391296}, {'type': 'perplexity', 'in_batch_dim': [8, 384], 'batch_size': 8, 'flops': 7593617765376}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3480408179072}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4113209653888}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3796808916480}, {'type': 'perplexity', 'in_batch_dim': [8, 336], 'batch_size': 8, 'flops': 6644415553152}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3480408179072}, {'type': 'perplexity', 'in_batch_dim': [6, 208], 'batch_size': 8, 'flops': 4113209653888}], 'timestamp': '2025-09-10 02:21:53.465801', 'step': 2793, 'epoch': 2} {'type': 'pplx', 'content': 19197019.4612857, 'timestamp': '2025-09-10 02:21:53.469410', 'step': 2793, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 304], 'flops': 9017733530176}, 'timestamp': '2025-09-10 02:21:53.499938', 'step': 2793, 'epoch': 2} {'type': 'loss', 'content': 0.000690083543304354, 'timestamp': '2025-09-10 02:21:53.510007', 'step': 2794, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 336], 'flops': 9966940982208}, 'timestamp': '2025-09-10 02:21:53.544004', 'step': 2794, 'epoch': 2} {'type': 'loss', 'content': 0.056752197444438934, 'timestamp': '2025-09-10 02:21:53.557315', 'step': 2795, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 6644714900096}, 'timestamp': '2025-09-10 02:21:53.588933', 'step': 2795, 'epoch': 2} {'type': 'loss', 'content': 0.0039436123333871365, 'timestamp': '2025-09-10 02:21:53.616773', 'step': 2796, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 7593922352128}, 'timestamp': '2025-09-10 02:21:53.647612', 'step': 2796, 'epoch': 2} {'type': 'loss', 'content': 0.003669754136353731, 'timestamp': '2025-09-10 02:21:53.653063', 'step': 2797, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 7119318626112}, 'timestamp': '2025-09-10 02:21:53.684749', 'step': 2797, 'epoch': 2} {'type': 'loss', 'content': 0.0009105192148126662, 'timestamp': '2025-09-10 02:21:53.692063', 'step': 2798, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 5695507448064}, 'timestamp': '2025-09-10 02:21:53.723340', 'step': 2798, 'epoch': 2} {'type': 'loss', 'content': 0.012229084968566895, 'timestamp': '2025-09-10 02:21:53.727670', 'step': 2799, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 5695507448064}, 'timestamp': '2025-09-10 02:21:53.757906', 'step': 2799, 'epoch': 2} {'type': 'loss', 'content': 0.0010326796909794211, 'timestamp': '2025-09-10 02:21:53.783340', 'step': 2800, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 5695507448064}, 'timestamp': '2025-09-10 02:21:53.814232', 'step': 2800, 'epoch': 2} {'type': 'loss', 'content': 0.015513862483203411, 'timestamp': '2025-09-10 02:21:53.816480', 'step': 2801, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 384], 'flops': 11390752160256}, 'timestamp': '2025-09-10 02:21:53.851680', 'step': 2801, 'epoch': 2} {'type': 'loss', 'content': 0.022338945418596268, 'timestamp': '2025-09-10 02:21:53.865695', 'step': 2802, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 5695507448064}, 'timestamp': '2025-09-10 02:21:53.898345', 'step': 2802, 'epoch': 2} {'type': 'loss', 'content': 0.012829114682972431, 'timestamp': '2025-09-10 02:21:53.905512', 'step': 2803, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 176], 'flops': 5220903722048}, 'timestamp': '2025-09-10 02:21:53.935544', 'step': 2803, 'epoch': 2} {'type': 'loss', 'content': 0.004405899439007044, 'timestamp': '2025-09-10 02:21:53.961305', 'step': 2804, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 320], 'flops': 9492337256192}, 'timestamp': '2025-09-10 02:21:54.000565', 'step': 2804, 'epoch': 2} {'type': 'loss', 'content': 0.0013872667914256454, 'timestamp': '2025-09-10 02:21:54.011093', 'step': 2805, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 8543129804160}, 'timestamp': '2025-09-10 02:21:54.046640', 'step': 2805, 'epoch': 2} {'type': 'loss', 'content': 0.005773500073701143, 'timestamp': '2025-09-10 02:21:54.057600', 'step': 2806, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 6644714900096}, 'timestamp': '2025-09-10 02:21:54.092646', 'step': 2806, 'epoch': 2} {'type': 'loss', 'content': 0.010280570015311241, 'timestamp': '2025-09-10 02:21:54.099713', 'step': 2807, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 6644714900096}, 'timestamp': '2025-09-10 02:21:54.134369', 'step': 2807, 'epoch': 2} {'type': 'loss', 'content': 0.03476468473672867, 'timestamp': '2025-09-10 02:21:54.162308', 'step': 2808, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 5695507448064}, 'timestamp': '2025-09-10 02:21:54.196654', 'step': 2808, 'epoch': 2} {'type': 'loss', 'content': 0.0031658527441322803, 'timestamp': '2025-09-10 02:21:54.198954', 'step': 2809, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 5695507448064}, 'timestamp': '2025-09-10 02:21:54.232450', 'step': 2809, 'epoch': 2} {'type': 'loss', 'content': 0.006831489037722349, 'timestamp': '2025-09-10 02:21:54.237018', 'step': 2810, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 320], 'flops': 9492337256192}, 'timestamp': '2025-09-10 02:21:54.278093', 'step': 2810, 'epoch': 2} {'type': 'loss', 'content': 0.0033258756157010794, 'timestamp': '2025-09-10 02:21:54.290608', 'step': 2811, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 8068526078144}, 'timestamp': '2025-09-10 02:21:54.328729', 'step': 2811, 'epoch': 2} {'type': 'loss', 'content': 0.024213241413235664, 'timestamp': '2025-09-10 02:21:54.359819', 'step': 2812, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 176], 'flops': 5220903722048}, 'timestamp': '2025-09-10 02:21:54.393296', 'step': 2812, 'epoch': 2} {'type': 'loss', 'content': 0.003739068517461419, 'timestamp': '2025-09-10 02:21:54.398570', 'step': 2813, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 7593922352128}, 'timestamp': '2025-09-10 02:21:54.449409', 'step': 2813, 'epoch': 2} {'type': 'loss', 'content': 0.0015904037281870842, 'timestamp': '2025-09-10 02:21:54.457159', 'step': 2814, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 5695507448064}, 'timestamp': '2025-09-10 02:21:54.494234', 'step': 2814, 'epoch': 2} {'type': 'loss', 'content': 0.0012923607137054205, 'timestamp': '2025-09-10 02:21:54.498995', 'step': 2815, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 7593922352128}, 'timestamp': '2025-09-10 02:21:54.535980', 'step': 2815, 'epoch': 2} {'type': 'loss', 'content': 0.0011023187544196844, 'timestamp': '2025-09-10 02:21:54.564641', 'step': 2816, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 7119318626112}, 'timestamp': '2025-09-10 02:21:54.599843', 'step': 2816, 'epoch': 2} {'type': 'loss', 'content': 0.005850085057318211, 'timestamp': '2025-09-10 02:21:54.604642', 'step': 2817, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 5695507448064}, 'timestamp': '2025-09-10 02:21:54.642705', 'step': 2817, 'epoch': 2} {'type': 'loss', 'content': 0.006922147236764431, 'timestamp': '2025-09-10 02:21:54.647104', 'step': 2818, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 5695507448064}, 'timestamp': '2025-09-10 02:21:54.682379', 'step': 2818, 'epoch': 2} {'type': 'loss', 'content': 0.006881711073219776, 'timestamp': '2025-09-10 02:21:54.688316', 'step': 2819, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 336], 'flops': 9966940982208}, 'timestamp': '2025-09-10 02:21:54.724767', 'step': 2819, 'epoch': 2} {'type': 'loss', 'content': 0.002905226079747081, 'timestamp': '2025-09-10 02:21:54.759067', 'step': 2820, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 304], 'flops': 9017733530176}, 'timestamp': '2025-09-10 02:21:54.796273', 'step': 2820, 'epoch': 2} {'type': 'loss', 'content': 0.042592164129018784, 'timestamp': '2025-09-10 02:21:54.804964', 'step': 2821, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 5695507448064}, 'timestamp': '2025-09-10 02:21:54.847161', 'step': 2821, 'epoch': 2} {'type': 'loss', 'content': 0.0068238540552556515, 'timestamp': '2025-09-10 02:21:54.853508', 'step': 2822, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 6644714900096}, 'timestamp': '2025-09-10 02:21:54.891239', 'step': 2822, 'epoch': 2} {'type': 'loss', 'content': 0.02196827158331871, 'timestamp': '2025-09-10 02:21:54.898080', 'step': 2823, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 7119318626112}, 'timestamp': '2025-09-10 02:21:54.934244', 'step': 2823, 'epoch': 2} {'type': 'loss', 'content': 0.007932315580546856, 'timestamp': '2025-09-10 02:21:54.962639', 'step': 2824, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 8543129804160}, 'timestamp': '2025-09-10 02:21:54.994981', 'step': 2824, 'epoch': 2} {'type': 'loss', 'content': 0.009902574121952057, 'timestamp': '2025-09-10 02:21:55.003516', 'step': 2825, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 8068526078144}, 'timestamp': '2025-09-10 02:21:55.035712', 'step': 2825, 'epoch': 2} {'type': 'loss', 'content': 0.0023097884841263294, 'timestamp': '2025-09-10 02:21:55.046003', 'step': 2826, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 6644714900096}, 'timestamp': '2025-09-10 02:21:55.076515', 'step': 2826, 'epoch': 2} {'type': 'loss', 'content': 0.010782705619931221, 'timestamp': '2025-09-10 02:21:55.083561', 'step': 2827, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 8068526078144}, 'timestamp': '2025-09-10 02:21:55.113817', 'step': 2827, 'epoch': 2} {'type': 'loss', 'content': 0.004876940976828337, 'timestamp': '2025-09-10 02:21:55.145150', 'step': 2828, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-10 02:21:55.174739', 'step': 2828, 'epoch': 2} {'type': 'loss', 'content': 0.0016192414332181215, 'timestamp': '2025-09-10 02:21:55.176954', 'step': 2829, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 6644714900096}, 'timestamp': '2025-09-10 02:21:55.207595', 'step': 2829, 'epoch': 2} {'type': 'loss', 'content': 0.013937892392277718, 'timestamp': '2025-09-10 02:21:55.214827', 'step': 2830, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 7119318626112}, 'timestamp': '2025-09-10 02:21:55.245059', 'step': 2830, 'epoch': 2} {'type': 'loss', 'content': 0.003963314928114414, 'timestamp': '2025-09-10 02:21:55.252530', 'step': 2831, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 176], 'flops': 5220903722048}, 'timestamp': '2025-09-10 02:21:55.283769', 'step': 2831, 'epoch': 2} {'type': 'loss', 'content': 0.018773654475808144, 'timestamp': '2025-09-10 02:21:55.308637', 'step': 2832, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 6170111174080}, 'timestamp': '2025-09-10 02:21:55.340079', 'step': 2832, 'epoch': 2} {'type': 'loss', 'content': 0.017363855615258217, 'timestamp': '2025-09-10 02:21:55.344371', 'step': 2833, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 6170111174080}, 'timestamp': '2025-09-10 02:21:55.374762', 'step': 2833, 'epoch': 2} {'type': 'loss', 'content': 0.003922105301171541, 'timestamp': '2025-09-10 02:21:55.381507', 'step': 2834, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 6644714900096}, 'timestamp': '2025-09-10 02:21:55.412237', 'step': 2834, 'epoch': 2} {'type': 'loss', 'content': 0.01441988069564104, 'timestamp': '2025-09-10 02:21:55.419228', 'step': 2835, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 8068526078144}, 'timestamp': '2025-09-10 02:21:55.456602', 'step': 2835, 'epoch': 2} {'type': 'loss', 'content': 0.016393983736634254, 'timestamp': '2025-09-10 02:21:55.487947', 'step': 2836, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 5695507448064}, 'timestamp': '2025-09-10 02:21:55.520112', 'step': 2836, 'epoch': 2} {'type': 'loss', 'content': 0.0013055962044745684, 'timestamp': '2025-09-10 02:21:55.522406', 'step': 2837, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 7119318626112}, 'timestamp': '2025-09-10 02:21:55.553310', 'step': 2837, 'epoch': 2} {'type': 'loss', 'content': 0.002322630723938346, 'timestamp': '2025-09-10 02:21:55.560592', 'step': 2838, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 7593922352128}, 'timestamp': '2025-09-10 02:21:55.591278', 'step': 2838, 'epoch': 2} {'type': 'loss', 'content': 0.0012900998117402196, 'timestamp': '2025-09-10 02:21:55.599034', 'step': 2839, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 6644714900096}, 'timestamp': '2025-09-10 02:21:55.631620', 'step': 2839, 'epoch': 2} {'type': 'loss', 'content': 0.004327766597270966, 'timestamp': '2025-09-10 02:21:55.659512', 'step': 2840, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 8068526078144}, 'timestamp': '2025-09-10 02:21:55.692246', 'step': 2840, 'epoch': 2} {'type': 'loss', 'content': 0.0029336088337004185, 'timestamp': '2025-09-10 02:21:55.699962', 'step': 2841, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 5695507448064}, 'timestamp': '2025-09-10 02:21:55.731399', 'step': 2841, 'epoch': 2} {'type': 'loss', 'content': 0.008453912101686, 'timestamp': '2025-09-10 02:21:55.735812', 'step': 2842, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 400], 'flops': 11865355886272}, 'timestamp': '2025-09-10 02:21:55.774425', 'step': 2842, 'epoch': 2} {'type': 'loss', 'content': 0.00400108378380537, 'timestamp': '2025-09-10 02:21:55.790113', 'step': 2843, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 6170111174080}, 'timestamp': '2025-09-10 02:21:55.819970', 'step': 2843, 'epoch': 2} {'type': 'loss', 'content': 0.001131609664298594, 'timestamp': '2025-09-10 02:21:55.847859', 'step': 2844, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 6170111174080}, 'timestamp': '2025-09-10 02:21:55.883692', 'step': 2844, 'epoch': 2} {'type': 'loss', 'content': 0.04177376627922058, 'timestamp': '2025-09-10 02:21:55.887997', 'step': 2845, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 7593922352128}, 'timestamp': '2025-09-10 02:21:55.926164', 'step': 2845, 'epoch': 2} {'type': 'loss', 'content': 0.013375887647271156, 'timestamp': '2025-09-10 02:21:55.934060', 'step': 2846, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 432], 'flops': 12814563338304}, 'timestamp': '2025-09-10 02:21:55.979412', 'step': 2846, 'epoch': 2} {'type': 'loss', 'content': 0.034866467118263245, 'timestamp': '2025-09-10 02:21:55.995621', 'step': 2847, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 5695507448064}, 'timestamp': '2025-09-10 02:21:56.040186', 'step': 2847, 'epoch': 2} {'type': 'loss', 'content': 0.008052507415413857, 'timestamp': '2025-09-10 02:21:56.065565', 'step': 2848, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 352], 'flops': 10441544708224}, 'timestamp': '2025-09-10 02:21:56.106829', 'step': 2848, 'epoch': 2} {'type': 'loss', 'content': 0.0027601835317909718, 'timestamp': '2025-09-10 02:21:56.119851', 'step': 2849, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 7593922352128}, 'timestamp': '2025-09-10 02:21:56.159735', 'step': 2849, 'epoch': 2} {'type': 'loss', 'content': 0.0046163699589669704, 'timestamp': '2025-09-10 02:21:56.167667', 'step': 2850, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 176], 'flops': 5220903722048}, 'timestamp': '2025-09-10 02:21:56.204477', 'step': 2850, 'epoch': 2} {'type': 'loss', 'content': 0.0024019996635615826, 'timestamp': '2025-09-10 02:21:56.208674', 'step': 2851, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 6170111174080}, 'timestamp': '2025-09-10 02:21:56.250504', 'step': 2851, 'epoch': 2} {'type': 'loss', 'content': 0.00619547301903367, 'timestamp': '2025-09-10 02:21:56.278254', 'step': 2852, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 4746299996032}, 'timestamp': '2025-09-10 02:21:56.309613', 'step': 2852, 'epoch': 2} {'type': 'loss', 'content': 0.011628863401710987, 'timestamp': '2025-09-10 02:21:56.311783', 'step': 2853, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-10 02:21:56.341612', 'step': 2853, 'epoch': 2} {'type': 'loss', 'content': 0.011419777758419514, 'timestamp': '2025-09-10 02:21:56.343838', 'step': 2854, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 416], 'flops': 12339959612288}, 'timestamp': '2025-09-10 02:21:56.382442', 'step': 2854, 'epoch': 2} {'type': 'loss', 'content': 0.012729802168905735, 'timestamp': '2025-09-10 02:21:56.398256', 'step': 2855, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:21:56.429465', 'step': 2855, 'epoch': 2} {'type': 'loss', 'content': 0.0015720551600679755, 'timestamp': '2025-09-10 02:21:56.453669', 'step': 2856, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 5695507448064}, 'timestamp': '2025-09-10 02:21:56.484715', 'step': 2856, 'epoch': 2} {'type': 'loss', 'content': 0.014717082493007183, 'timestamp': '2025-09-10 02:21:56.488184', 'step': 2857, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 7119318626112}, 'timestamp': '2025-09-10 02:21:56.518013', 'step': 2857, 'epoch': 2} {'type': 'loss', 'content': 0.003454964840784669, 'timestamp': '2025-09-10 02:21:56.525556', 'step': 2858, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 8068526078144}, 'timestamp': '2025-09-10 02:21:56.556246', 'step': 2858, 'epoch': 2} {'type': 'loss', 'content': 0.002767723286524415, 'timestamp': '2025-09-10 02:21:56.566504', 'step': 2859, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 6644714900096}, 'timestamp': '2025-09-10 02:21:56.596507', 'step': 2859, 'epoch': 2} {'type': 'loss', 'content': 0.012848809361457825, 'timestamp': '2025-09-10 02:21:56.624554', 'step': 2860, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 8068526078144}, 'timestamp': '2025-09-10 02:21:56.655309', 'step': 2860, 'epoch': 2} {'type': 'loss', 'content': 0.015174107626080513, 'timestamp': '2025-09-10 02:21:56.663342', 'step': 2861, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 368], 'flops': 10916148434240}, 'timestamp': '2025-09-10 02:21:56.697483', 'step': 2861, 'epoch': 2} {'type': 'loss', 'content': 0.006852737162262201, 'timestamp': '2025-09-10 02:21:56.711212', 'step': 2862, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 7593922352128}, 'timestamp': '2025-09-10 02:21:56.742102', 'step': 2862, 'epoch': 2} {'type': 'loss', 'content': 0.016556836664676666, 'timestamp': '2025-09-10 02:21:56.749989', 'step': 2863, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 7593922352128}, 'timestamp': '2025-09-10 02:21:56.780543', 'step': 2863, 'epoch': 2} {'type': 'loss', 'content': 0.012003665789961815, 'timestamp': '2025-09-10 02:21:56.809224', 'step': 2864, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 7593922352128}, 'timestamp': '2025-09-10 02:21:56.840636', 'step': 2864, 'epoch': 2} {'type': 'loss', 'content': 0.0033726885449141264, 'timestamp': '2025-09-10 02:21:56.846253', 'step': 2865, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 304], 'flops': 9017733530176}, 'timestamp': '2025-09-10 02:21:56.878932', 'step': 2865, 'epoch': 2} {'type': 'loss', 'content': 0.001546733663417399, 'timestamp': '2025-09-10 02:21:56.891219', 'step': 2866, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 320], 'flops': 9492337256192}, 'timestamp': '2025-09-10 02:21:56.925077', 'step': 2866, 'epoch': 2} {'type': 'loss', 'content': 0.018309442326426506, 'timestamp': '2025-09-10 02:21:56.937635', 'step': 2867, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 368], 'flops': 10916148434240}, 'timestamp': '2025-09-10 02:21:56.976009', 'step': 2867, 'epoch': 2} {'type': 'loss', 'content': 0.005003686994314194, 'timestamp': '2025-09-10 02:21:57.011500', 'step': 2868, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 8068526078144}, 'timestamp': '2025-09-10 02:21:57.050458', 'step': 2868, 'epoch': 2} {'type': 'loss', 'content': 0.01586066372692585, 'timestamp': '2025-09-10 02:21:57.058502', 'step': 2869, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 7593922352128}, 'timestamp': '2025-09-10 02:21:57.094025', 'step': 2869, 'epoch': 2} {'type': 'loss', 'content': 0.022168749943375587, 'timestamp': '2025-09-10 02:21:57.101730', 'step': 2870, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 416], 'flops': 12339959612288}, 'timestamp': '2025-09-10 02:21:57.145405', 'step': 2870, 'epoch': 2} {'type': 'loss', 'content': 0.011965368874371052, 'timestamp': '2025-09-10 02:21:57.161292', 'step': 2871, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 6170111174080}, 'timestamp': '2025-09-10 02:21:57.202765', 'step': 2871, 'epoch': 2} {'type': 'loss', 'content': 0.0080100167542696, 'timestamp': '2025-09-10 02:21:57.230608', 'step': 2872, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 352], 'flops': 10441544708224}, 'timestamp': '2025-09-10 02:21:57.269690', 'step': 2872, 'epoch': 2} {'type': 'loss', 'content': 0.004915738943964243, 'timestamp': '2025-09-10 02:21:57.282756', 'step': 2873, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 336], 'flops': 9966940982208}, 'timestamp': '2025-09-10 02:21:57.324127', 'step': 2873, 'epoch': 2} {'type': 'loss', 'content': 0.036590684205293655, 'timestamp': '2025-09-10 02:21:57.337518', 'step': 2874, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 5695507448064}, 'timestamp': '2025-09-10 02:21:57.378714', 'step': 2874, 'epoch': 2} {'type': 'loss', 'content': 0.016932787373661995, 'timestamp': '2025-09-10 02:21:57.383194', 'step': 2875, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 8543129804160}, 'timestamp': '2025-09-10 02:21:57.421402', 'step': 2875, 'epoch': 2} {'type': 'loss', 'content': 0.002477414207533002, 'timestamp': '2025-09-10 02:21:57.453426', 'step': 2876, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 176], 'flops': 5220903722048}, 'timestamp': '2025-09-10 02:21:57.493622', 'step': 2876, 'epoch': 2} {'type': 'loss', 'content': 0.009233239106833935, 'timestamp': '2025-09-10 02:21:57.497109', 'step': 2877, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 7119318626112}, 'timestamp': '2025-09-10 02:21:57.531367', 'step': 2877, 'epoch': 2} {'type': 'loss', 'content': 0.004387423861771822, 'timestamp': '2025-09-10 02:21:57.538838', 'step': 2878, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 176], 'flops': 5220903722048}, 'timestamp': '2025-09-10 02:21:57.576986', 'step': 2878, 'epoch': 2} {'type': 'loss', 'content': 0.0022044102661311626, 'timestamp': '2025-09-10 02:21:57.581193', 'step': 2879, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 304], 'flops': 9017733530176}, 'timestamp': '2025-09-10 02:21:57.620656', 'step': 2879, 'epoch': 2} {'type': 'loss', 'content': 0.003344293450936675, 'timestamp': '2025-09-10 02:21:57.653820', 'step': 2880, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 7593922352128}, 'timestamp': '2025-09-10 02:21:57.688571', 'step': 2880, 'epoch': 2} {'type': 'loss', 'content': 0.0033048386685550213, 'timestamp': '2025-09-10 02:21:57.693903', 'step': 2881, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 4746299996032}, 'timestamp': '2025-09-10 02:21:57.729127', 'step': 2881, 'epoch': 2} {'type': 'loss', 'content': 0.013956844806671143, 'timestamp': '2025-09-10 02:21:57.733657', 'step': 2882, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 7593922352128}, 'timestamp': '2025-09-10 02:21:57.772898', 'step': 2882, 'epoch': 2} {'type': 'loss', 'content': 0.017580043524503708, 'timestamp': '2025-09-10 02:21:57.780655', 'step': 2883, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 6644714900096}, 'timestamp': '2025-09-10 02:21:57.823983', 'step': 2883, 'epoch': 2} {'type': 'loss', 'content': 0.006970468442887068, 'timestamp': '2025-09-10 02:21:57.852000', 'step': 2884, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 8068526078144}, 'timestamp': '2025-09-10 02:21:57.892963', 'step': 2884, 'epoch': 2} {'type': 'loss', 'content': 0.002934870542958379, 'timestamp': '2025-09-10 02:21:57.901055', 'step': 2885, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 7119318626112}, 'timestamp': '2025-09-10 02:21:57.938969', 'step': 2885, 'epoch': 2} {'type': 'loss', 'content': 0.0011385561665520072, 'timestamp': '2025-09-10 02:21:57.946648', 'step': 2886, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 7593922352128}, 'timestamp': '2025-09-10 02:21:57.999025', 'step': 2886, 'epoch': 2} {'type': 'loss', 'content': 0.010981320403516293, 'timestamp': '2025-09-10 02:21:58.010261', 'step': 2887, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 304], 'flops': 9017733530176}, 'timestamp': '2025-09-10 02:21:58.064350', 'step': 2887, 'epoch': 2} {'type': 'loss', 'content': 0.01800290308892727, 'timestamp': '2025-09-10 02:21:58.096941', 'step': 2888, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 7593922352128}, 'timestamp': '2025-09-10 02:21:58.139621', 'step': 2888, 'epoch': 2} {'type': 'loss', 'content': 0.00524926045909524, 'timestamp': '2025-09-10 02:21:58.147001', 'step': 2889, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 480], 'flops': 14238374516352}, 'timestamp': '2025-09-10 02:21:58.205833', 'step': 2889, 'epoch': 2} {'type': 'loss', 'content': 0.004338169004768133, 'timestamp': '2025-09-10 02:21:58.223203', 'step': 2890, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 6644714900096}, 'timestamp': '2025-09-10 02:21:58.268415', 'step': 2890, 'epoch': 2} {'type': 'loss', 'content': 0.01495091337710619, 'timestamp': '2025-09-10 02:21:58.275632', 'step': 2891, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 5695507448064}, 'timestamp': '2025-09-10 02:21:58.305991', 'step': 2891, 'epoch': 2} {'type': 'loss', 'content': 0.0038837611209601164, 'timestamp': '2025-09-10 02:21:58.331474', 'step': 2892, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 688], 'flops': 20408222954560}, 'timestamp': '2025-09-10 02:21:58.386481', 'step': 2892, 'epoch': 2} {'type': 'loss', 'content': 0.001870313542895019, 'timestamp': '2025-09-10 02:21:58.410780', 'step': 2893, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 4746299996032}, 'timestamp': '2025-09-10 02:21:58.441343', 'step': 2893, 'epoch': 2} {'type': 'loss', 'content': 0.0013324370374903083, 'timestamp': '2025-09-10 02:21:58.444028', 'step': 2894, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 7593922352128}, 'timestamp': '2025-09-10 02:21:58.474641', 'step': 2894, 'epoch': 2} {'type': 'loss', 'content': 0.003104160074144602, 'timestamp': '2025-09-10 02:21:58.482522', 'step': 2895, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 7119318626112}, 'timestamp': '2025-09-10 02:21:58.514154', 'step': 2895, 'epoch': 2} {'type': 'loss', 'content': 0.0025289487093687057, 'timestamp': '2025-09-10 02:21:58.542545', 'step': 2896, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 4746299996032}, 'timestamp': '2025-09-10 02:21:58.573421', 'step': 2896, 'epoch': 2} {'type': 'loss', 'content': 0.0017517339438199997, 'timestamp': '2025-09-10 02:21:58.575491', 'step': 2897, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 304], 'flops': 9017733530176}, 'timestamp': '2025-09-10 02:21:58.606654', 'step': 2897, 'epoch': 2} {'type': 'loss', 'content': 0.004530445206910372, 'timestamp': '2025-09-10 02:21:58.618786', 'step': 2898, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 7593922352128}, 'timestamp': '2025-09-10 02:21:58.650642', 'step': 2898, 'epoch': 2} {'type': 'loss', 'content': 0.0009508281364105642, 'timestamp': '2025-09-10 02:21:58.658545', 'step': 2899, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 6644714900096}, 'timestamp': '2025-09-10 02:21:58.689706', 'step': 2899, 'epoch': 2} {'type': 'loss', 'content': 0.012976233847439289, 'timestamp': '2025-09-10 02:21:58.717747', 'step': 2900, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 400], 'flops': 11865355886272}, 'timestamp': '2025-09-10 02:21:58.754243', 'step': 2900, 'epoch': 2} {'type': 'loss', 'content': 0.002717123832553625, 'timestamp': '2025-09-10 02:21:58.769406', 'step': 2901, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 7119318626112}, 'timestamp': '2025-09-10 02:21:58.800524', 'step': 2901, 'epoch': 2} {'type': 'loss', 'content': 0.019545141607522964, 'timestamp': '2025-09-10 02:21:58.807986', 'step': 2902, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 560], 'flops': 16611393146432}, 'timestamp': '2025-09-10 02:21:58.854567', 'step': 2902, 'epoch': 2} {'type': 'loss', 'content': 0.0025809798389673233, 'timestamp': '2025-09-10 02:21:58.873979', 'step': 2903, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 6170111174080}, 'timestamp': '2025-09-10 02:21:58.910932', 'step': 2903, 'epoch': 2} {'type': 'loss', 'content': 0.0010793408146128058, 'timestamp': '2025-09-10 02:21:58.939062', 'step': 2904, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 8068526078144}, 'timestamp': '2025-09-10 02:21:58.969530', 'step': 2904, 'epoch': 2} {'type': 'loss', 'content': 0.002281660446897149, 'timestamp': '2025-09-10 02:21:58.977402', 'step': 2905, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 6170111174080}, 'timestamp': '2025-09-10 02:21:59.008417', 'step': 2905, 'epoch': 2} {'type': 'loss', 'content': 0.008847257122397423, 'timestamp': '2025-09-10 02:21:59.015401', 'step': 2906, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 5695507448064}, 'timestamp': '2025-09-10 02:21:59.051422', 'step': 2906, 'epoch': 2} {'type': 'loss', 'content': 0.00565936928614974, 'timestamp': '2025-09-10 02:21:59.055983', 'step': 2907, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 320], 'flops': 9492337256192}, 'timestamp': '2025-09-10 02:21:59.094239', 'step': 2907, 'epoch': 2} {'type': 'loss', 'content': 0.0032904818654060364, 'timestamp': '2025-09-10 02:21:59.127238', 'step': 2908, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 304], 'flops': 9017733530176}, 'timestamp': '2025-09-10 02:21:59.158629', 'step': 2908, 'epoch': 2} {'type': 'loss', 'content': 0.0015532250981777906, 'timestamp': '2025-09-10 02:21:59.168322', 'step': 2909, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 6644714900096}, 'timestamp': '2025-09-10 02:21:59.200228', 'step': 2909, 'epoch': 2} {'type': 'loss', 'content': 0.0015883222222328186, 'timestamp': '2025-09-10 02:21:59.207380', 'step': 2910, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 304], 'flops': 9017733530176}, 'timestamp': '2025-09-10 02:21:59.238162', 'step': 2910, 'epoch': 2} {'type': 'loss', 'content': 0.01465687807649374, 'timestamp': '2025-09-10 02:21:59.250384', 'step': 2911, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 5695507448064}, 'timestamp': '2025-09-10 02:21:59.285448', 'step': 2911, 'epoch': 2} {'type': 'loss', 'content': 0.000894768163561821, 'timestamp': '2025-09-10 02:21:59.310633', 'step': 2912, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 5695507448064}, 'timestamp': '2025-09-10 02:21:59.341168', 'step': 2912, 'epoch': 2} {'type': 'loss', 'content': 0.001928298850543797, 'timestamp': '2025-09-10 02:21:59.343617', 'step': 2913, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 5695507448064}, 'timestamp': '2025-09-10 02:21:59.375092', 'step': 2913, 'epoch': 2} {'type': 'loss', 'content': 0.0025657066144049168, 'timestamp': '2025-09-10 02:21:59.379746', 'step': 2914, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 384], 'flops': 11390752160256}, 'timestamp': '2025-09-10 02:21:59.414770', 'step': 2914, 'epoch': 2} {'type': 'loss', 'content': 0.0007942708325572312, 'timestamp': '2025-09-10 02:21:59.428803', 'step': 2915, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 5695507448064}, 'timestamp': '2025-09-10 02:21:59.461407', 'step': 2915, 'epoch': 2} {'type': 'loss', 'content': 0.012047487311065197, 'timestamp': '2025-09-10 02:21:59.486808', 'step': 2916, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 5695507448064}, 'timestamp': '2025-09-10 02:21:59.527655', 'step': 2916, 'epoch': 2} {'type': 'loss', 'content': 0.030188219621777534, 'timestamp': '2025-09-10 02:21:59.532533', 'step': 2917, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 6644714900096}, 'timestamp': '2025-09-10 02:21:59.570354', 'step': 2917, 'epoch': 2} {'type': 'loss', 'content': 0.02199845388531685, 'timestamp': '2025-09-10 02:21:59.577441', 'step': 2918, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 304], 'flops': 9017733530176}, 'timestamp': '2025-09-10 02:21:59.614234', 'step': 2918, 'epoch': 2} {'type': 'loss', 'content': 0.010863765142858028, 'timestamp': '2025-09-10 02:21:59.625220', 'step': 2919, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 368], 'flops': 10916148434240}, 'timestamp': '2025-09-10 02:21:59.669965', 'step': 2919, 'epoch': 2} {'type': 'loss', 'content': 0.0012042339658364654, 'timestamp': '2025-09-10 02:21:59.704651', 'step': 2920, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 176], 'flops': 5220903722048}, 'timestamp': '2025-09-10 02:21:59.735315', 'step': 2920, 'epoch': 2} {'type': 'loss', 'content': 0.04105643555521965, 'timestamp': '2025-09-10 02:21:59.737634', 'step': 2921, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 4746299996032}, 'timestamp': '2025-09-10 02:21:59.767694', 'step': 2921, 'epoch': 2} {'type': 'loss', 'content': 0.00044288174831308424, 'timestamp': '2025-09-10 02:21:59.770211', 'step': 2922, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 8543129804160}, 'timestamp': '2025-09-10 02:21:59.800706', 'step': 2922, 'epoch': 2} {'type': 'loss', 'content': 0.006732792127877474, 'timestamp': '2025-09-10 02:21:59.811614', 'step': 2923, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 6170111174080}, 'timestamp': '2025-09-10 02:21:59.841546', 'step': 2923, 'epoch': 2} {'type': 'loss', 'content': 0.0017994015943259, 'timestamp': '2025-09-10 02:21:59.869331', 'step': 2924, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 4746299996032}, 'timestamp': '2025-09-10 02:21:59.900936', 'step': 2924, 'epoch': 2} {'type': 'loss', 'content': 0.0027029775083065033, 'timestamp': '2025-09-10 02:21:59.910460', 'step': 2925, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 176], 'flops': 5220903722048}, 'timestamp': '2025-09-10 02:21:59.956229', 'step': 2925, 'epoch': 2} {'type': 'loss', 'content': 0.005239599384367466, 'timestamp': '2025-09-10 02:21:59.960170', 'step': 2926, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 8068526078144}, 'timestamp': '2025-09-10 02:21:59.994080', 'step': 2926, 'epoch': 2} {'type': 'loss', 'content': 0.006697875447571278, 'timestamp': '2025-09-10 02:22:00.004375', 'step': 2927, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 176], 'flops': 5220903722048}, 'timestamp': '2025-09-10 02:22:00.038069', 'step': 2927, 'epoch': 2} {'type': 'loss', 'content': 0.03148489445447922, 'timestamp': '2025-09-10 02:22:00.063041', 'step': 2928, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 7119318626112}, 'timestamp': '2025-09-10 02:22:00.094406', 'step': 2928, 'epoch': 2} {'type': 'loss', 'content': 0.020124191418290138, 'timestamp': '2025-09-10 02:22:00.099254', 'step': 2929, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 464], 'flops': 13763770790336}, 'timestamp': '2025-09-10 02:22:00.140652', 'step': 2929, 'epoch': 2} {'type': 'loss', 'content': 0.010933955200016499, 'timestamp': '2025-09-10 02:22:00.157710', 'step': 2930, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 320], 'flops': 9492337256192}, 'timestamp': '2025-09-10 02:22:00.190255', 'step': 2930, 'epoch': 2} {'type': 'loss', 'content': 0.013913453556597233, 'timestamp': '2025-09-10 02:22:00.202665', 'step': 2931, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 6170111174080}, 'timestamp': '2025-09-10 02:22:00.234784', 'step': 2931, 'epoch': 2} {'type': 'loss', 'content': 0.003610015381127596, 'timestamp': '2025-09-10 02:22:00.262635', 'step': 2932, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 6170111174080}, 'timestamp': '2025-09-10 02:22:00.293763', 'step': 2932, 'epoch': 2} {'type': 'loss', 'content': 0.001940641668625176, 'timestamp': '2025-09-10 02:22:00.298029', 'step': 2933, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 8543129804160}, 'timestamp': '2025-09-10 02:22:00.330128', 'step': 2933, 'epoch': 2} {'type': 'loss', 'content': 0.0037323671858757734, 'timestamp': '2025-09-10 02:22:00.340931', 'step': 2934, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 336], 'flops': 9966940982208}, 'timestamp': '2025-09-10 02:22:00.374918', 'step': 2934, 'epoch': 2} {'type': 'loss', 'content': 0.01665830798447132, 'timestamp': '2025-09-10 02:22:00.388319', 'step': 2935, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 5695507448064}, 'timestamp': '2025-09-10 02:22:00.419887', 'step': 2935, 'epoch': 2} {'type': 'loss', 'content': 0.005614429712295532, 'timestamp': '2025-09-10 02:22:00.447847', 'step': 2936, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 8068526078144}, 'timestamp': '2025-09-10 02:22:00.479376', 'step': 2936, 'epoch': 2} {'type': 'loss', 'content': 0.0027582976035773754, 'timestamp': '2025-09-10 02:22:00.487368', 'step': 2937, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 7593922352128}, 'timestamp': '2025-09-10 02:22:00.518110', 'step': 2937, 'epoch': 2} {'type': 'loss', 'content': 0.003742832690477371, 'timestamp': '2025-09-10 02:22:00.525908', 'step': 2938, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 5695507448064}, 'timestamp': '2025-09-10 02:22:00.556958', 'step': 2938, 'epoch': 2} {'type': 'loss', 'content': 0.039867255836725235, 'timestamp': '2025-09-10 02:22:00.561533', 'step': 2939, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 176], 'flops': 5220903722048}, 'timestamp': '2025-09-10 02:22:00.593247', 'step': 2939, 'epoch': 2} {'type': 'loss', 'content': 0.0062867277301847935, 'timestamp': '2025-09-10 02:22:00.618271', 'step': 2940, 'epoch': 2} {'type': 'flops', 'content': [{'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5695213340928}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3796808916480}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4429610391296}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4746011128704}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4746011128704}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6011614078336}, {'type': 'perplexity', 'in_batch_dim': [8, 736], 'batch_size': 8, 'flops': 14554433988352}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3796808916480}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4113209653888}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3796808916480}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4746011128704}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2847606704256}, {'type': 'perplexity', 'in_batch_dim': [8, 320], 'batch_size': 8, 'flops': 6328014815744}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5378812603520}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3796808916480}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4746011128704}, {'type': 'perplexity', 'in_batch_dim': [8, 352], 'batch_size': 8, 'flops': 6960816290560}, {'type': 'perplexity', 'in_batch_dim': [8, 384], 'batch_size': 8, 'flops': 7593617765376}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5378812603520}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4429610391296}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4113209653888}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5062411866112}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5062411866112}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4429610391296}, {'type': 'perplexity', 'in_batch_dim': [8, 480], 'batch_size': 8, 'flops': 9492022189824}, {'type': 'perplexity', 'in_batch_dim': [8, 448], 'batch_size': 8, 'flops': 8859220715008}, {'type': 'perplexity', 'in_batch_dim': [8, 320], 'batch_size': 8, 'flops': 6328014815744}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3796808916480}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6011614078336}, {'type': 'perplexity', 'in_batch_dim': [8, 320], 'batch_size': 8, 'flops': 6328014815744}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5695213340928}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4746011128704}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4429610391296}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5378812603520}, {'type': 'perplexity', 'in_batch_dim': [8, 544], 'batch_size': 8, 'flops': 10757625139456}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4429610391296}, {'type': 'perplexity', 'in_batch_dim': [8, 320], 'batch_size': 8, 'flops': 6328014815744}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4113209653888}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4113209653888}, {'type': 'perplexity', 'in_batch_dim': [8, 320], 'batch_size': 8, 'flops': 6328014815744}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3480408179072}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5062411866112}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6011614078336}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4429610391296}, {'type': 'perplexity', 'in_batch_dim': [8, 384], 'batch_size': 8, 'flops': 7593617765376}, {'type': 'perplexity', 'in_batch_dim': [8, 416], 'batch_size': 8, 'flops': 8226419240192}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4746011128704}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3480408179072}, {'type': 'perplexity', 'in_batch_dim': [8, 384], 'batch_size': 8, 'flops': 7593617765376}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3480408179072}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3480408179072}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5378812603520}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3796808916480}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4113209653888}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5062411866112}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5378812603520}, {'type': 'perplexity', 'in_batch_dim': [8, 320], 'batch_size': 8, 'flops': 6328014815744}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4429610391296}, {'type': 'perplexity', 'in_batch_dim': [8, 160], 'batch_size': 8, 'flops': 3164007441664}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4113209653888}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5695213340928}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3796808916480}, {'type': 'perplexity', 'in_batch_dim': [8, 160], 'batch_size': 8, 'flops': 3164007441664}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6011614078336}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5062411866112}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4429610391296}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3480408179072}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4746011128704}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3480408179072}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4113209653888}, {'type': 'perplexity', 'in_batch_dim': [8, 624], 'batch_size': 8, 'flops': 12339628826496}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4113209653888}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5695213340928}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5062411866112}, {'type': 'perplexity', 'in_batch_dim': [8, 384], 'batch_size': 8, 'flops': 7593617765376}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4429610391296}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4113209653888}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5062411866112}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4429610391296}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5695213340928}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3796808916480}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5378812603520}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4429610391296}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5378812603520}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3796808916480}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5695213340928}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4429610391296}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5062411866112}, {'type': 'perplexity', 'in_batch_dim': [8, 160], 'batch_size': 8, 'flops': 3164007441664}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4746011128704}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5695213340928}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3796808916480}, {'type': 'perplexity', 'in_batch_dim': [8, 416], 'batch_size': 8, 'flops': 8226419240192}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3796808916480}, {'type': 'perplexity', 'in_batch_dim': [8, 160], 'batch_size': 8, 'flops': 3164007441664}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4746011128704}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3796808916480}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2847606704256}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5378812603520}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4429610391296}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4113209653888}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4429610391296}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6011614078336}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4113209653888}, {'type': 'perplexity', 'in_batch_dim': [8, 320], 'batch_size': 8, 'flops': 6328014815744}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4746011128704}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4746011128704}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3796808916480}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5062411866112}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3480408179072}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3796808916480}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3480408179072}, {'type': 'perplexity', 'in_batch_dim': [8, 352], 'batch_size': 8, 'flops': 6960816290560}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5062411866112}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4429610391296}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6011614078336}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5062411866112}, {'type': 'perplexity', 'in_batch_dim': [8, 576], 'batch_size': 8, 'flops': 11390426614272}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4429610391296}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3796808916480}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6011614078336}, {'type': 'perplexity', 'in_batch_dim': [8, 432], 'batch_size': 8, 'flops': 8542819977600}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5378812603520}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5378812603520}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5062411866112}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4113209653888}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4113209653888}, {'type': 'perplexity', 'in_batch_dim': [8, 368], 'batch_size': 8, 'flops': 7277217027968}, {'type': 'perplexity', 'in_batch_dim': [8, 160], 'batch_size': 8, 'flops': 3164007441664}, {'type': 'perplexity', 'in_batch_dim': [8, 368], 'batch_size': 8, 'flops': 7277217027968}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3796808916480}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4746011128704}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4113209653888}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4429610391296}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5695213340928}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5695213340928}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5062411866112}, {'type': 'perplexity', 'in_batch_dim': [8, 432], 'batch_size': 8, 'flops': 8542819977600}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5062411866112}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4113209653888}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3480408179072}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3480408179072}, {'type': 'perplexity', 'in_batch_dim': [8, 160], 'batch_size': 8, 'flops': 3164007441664}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4113209653888}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3796808916480}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5062411866112}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 368], 'batch_size': 8, 'flops': 7277217027968}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5378812603520}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5378812603520}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5062411866112}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2847606704256}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4429610391296}, {'type': 'perplexity', 'in_batch_dim': [8, 320], 'batch_size': 8, 'flops': 6328014815744}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3796808916480}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4429610391296}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4113209653888}, {'type': 'perplexity', 'in_batch_dim': [8, 384], 'batch_size': 8, 'flops': 7593617765376}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5378812603520}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4746011128704}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3480408179072}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5062411866112}, {'type': 'perplexity', 'in_batch_dim': [8, 320], 'batch_size': 8, 'flops': 6328014815744}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5378812603520}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4113209653888}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4113209653888}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5695213340928}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3480408179072}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6011614078336}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4746011128704}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5378812603520}, {'type': 'perplexity', 'in_batch_dim': [8, 320], 'batch_size': 8, 'flops': 6328014815744}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4429610391296}, {'type': 'perplexity', 'in_batch_dim': [8, 320], 'batch_size': 8, 'flops': 6328014815744}, {'type': 'perplexity', 'in_batch_dim': [8, 416], 'batch_size': 8, 'flops': 8226419240192}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5062411866112}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5062411866112}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2847606704256}, {'type': 'perplexity', 'in_batch_dim': [8, 336], 'batch_size': 8, 'flops': 6644415553152}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6011614078336}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5062411866112}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4113209653888}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5695213340928}, {'type': 'perplexity', 'in_batch_dim': [8, 416], 'batch_size': 8, 'flops': 8226419240192}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3796808916480}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5695213340928}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4746011128704}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4429610391296}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4113209653888}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3480408179072}, {'type': 'perplexity', 'in_batch_dim': [8, 320], 'batch_size': 8, 'flops': 6328014815744}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3480408179072}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5378812603520}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6011614078336}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4429610391296}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5695213340928}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5695213340928}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3480408179072}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4429610391296}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5062411866112}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3480408179072}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4113209653888}, {'type': 'perplexity', 'in_batch_dim': [8, 352], 'batch_size': 8, 'flops': 6960816290560}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4113209653888}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2847606704256}, {'type': 'perplexity', 'in_batch_dim': [8, 160], 'batch_size': 8, 'flops': 3164007441664}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3796808916480}, {'type': 'perplexity', 'in_batch_dim': [8, 496], 'batch_size': 8, 'flops': 9808422927232}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3480408179072}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4113209653888}, {'type': 'perplexity', 'in_batch_dim': [8, 544], 'batch_size': 8, 'flops': 10757625139456}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6011614078336}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5378812603520}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5695213340928}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4746011128704}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5695213340928}, {'type': 'perplexity', 'in_batch_dim': [8, 384], 'batch_size': 8, 'flops': 7593617765376}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3480408179072}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5378812603520}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3480408179072}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5378812603520}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5062411866112}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3796808916480}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5062411866112}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5695213340928}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5378812603520}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4429610391296}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5062411866112}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3480408179072}, {'type': 'perplexity', 'in_batch_dim': [8, 656], 'batch_size': 8, 'flops': 12972430301312}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5062411866112}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3796808916480}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3480408179072}, {'type': 'perplexity', 'in_batch_dim': [8, 160], 'batch_size': 8, 'flops': 3164007441664}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5695213340928}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6011614078336}, {'type': 'perplexity', 'in_batch_dim': [8, 320], 'batch_size': 8, 'flops': 6328014815744}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5062411866112}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5062411866112}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5695213340928}, {'type': 'perplexity', 'in_batch_dim': [8, 320], 'batch_size': 8, 'flops': 6328014815744}, {'type': 'perplexity', 'in_batch_dim': [8, 384], 'batch_size': 8, 'flops': 7593617765376}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5062411866112}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4746011128704}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4113209653888}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4746011128704}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4746011128704}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3480408179072}, {'type': 'perplexity', 'in_batch_dim': [8, 320], 'batch_size': 8, 'flops': 6328014815744}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5695213340928}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5062411866112}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4113209653888}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4746011128704}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2847606704256}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3796808916480}, {'type': 'perplexity', 'in_batch_dim': [8, 336], 'batch_size': 8, 'flops': 6644415553152}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4113209653888}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4746011128704}, {'type': 'perplexity', 'in_batch_dim': [8, 496], 'batch_size': 8, 'flops': 9808422927232}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5378812603520}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5378812603520}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4429610391296}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5062411866112}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6011614078336}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4746011128704}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3796808916480}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4113209653888}, {'type': 'perplexity', 'in_batch_dim': [8, 496], 'batch_size': 8, 'flops': 9808422927232}, {'type': 'perplexity', 'in_batch_dim': [8, 448], 'batch_size': 8, 'flops': 8859220715008}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6011614078336}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3796808916480}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3480408179072}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5062411866112}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4429610391296}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4113209653888}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4746011128704}, {'type': 'perplexity', 'in_batch_dim': [8, 320], 'batch_size': 8, 'flops': 6328014815744}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6011614078336}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3480408179072}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3796808916480}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3796808916480}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4746011128704}, {'type': 'perplexity', 'in_batch_dim': [8, 416], 'batch_size': 8, 'flops': 8226419240192}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5695213340928}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6011614078336}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6011614078336}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4746011128704}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3480408179072}, {'type': 'perplexity', 'in_batch_dim': [8, 160], 'batch_size': 8, 'flops': 3164007441664}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5695213340928}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3480408179072}, {'type': 'perplexity', 'in_batch_dim': [8, 400], 'batch_size': 8, 'flops': 7910018502784}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4429610391296}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5378812603520}, {'type': 'perplexity', 'in_batch_dim': [8, 400], 'batch_size': 8, 'flops': 7910018502784}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5378812603520}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3480408179072}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6011614078336}, {'type': 'perplexity', 'in_batch_dim': [8, 160], 'batch_size': 8, 'flops': 3164007441664}, {'type': 'perplexity', 'in_batch_dim': [8, 352], 'batch_size': 8, 'flops': 6960816290560}, {'type': 'perplexity', 'in_batch_dim': [8, 320], 'batch_size': 8, 'flops': 6328014815744}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5062411866112}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5062411866112}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4113209653888}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4429610391296}, {'type': 'perplexity', 'in_batch_dim': [8, 352], 'batch_size': 8, 'flops': 6960816290560}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4113209653888}, {'type': 'perplexity', 'in_batch_dim': [8, 160], 'batch_size': 8, 'flops': 3164007441664}, {'type': 'perplexity', 'in_batch_dim': [8, 336], 'batch_size': 8, 'flops': 6644415553152}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4429610391296}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3796808916480}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5378812603520}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5378812603520}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4113209653888}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5378812603520}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3796808916480}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5378812603520}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5378812603520}, {'type': 'perplexity', 'in_batch_dim': [8, 160], 'batch_size': 8, 'flops': 3164007441664}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3480408179072}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6011614078336}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5695213340928}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5378812603520}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5378812603520}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4429610391296}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4746011128704}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6011614078336}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5062411866112}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6011614078336}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5378812603520}, {'type': 'perplexity', 'in_batch_dim': [8, 160], 'batch_size': 8, 'flops': 3164007441664}, {'type': 'perplexity', 'in_batch_dim': [8, 160], 'batch_size': 8, 'flops': 3164007441664}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5062411866112}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5062411866112}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3796808916480}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6011614078336}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4113209653888}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4746011128704}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5378812603520}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3480408179072}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3796808916480}, {'type': 'perplexity', 'in_batch_dim': [8, 320], 'batch_size': 8, 'flops': 6328014815744}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5062411866112}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4746011128704}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5062411866112}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4746011128704}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6011614078336}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3796808916480}, {'type': 'perplexity', 'in_batch_dim': [8, 160], 'batch_size': 8, 'flops': 3164007441664}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2847606704256}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4429610391296}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4746011128704}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5062411866112}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5378812603520}, {'type': 'perplexity', 'in_batch_dim': [8, 464], 'batch_size': 8, 'flops': 9175621452416}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4113209653888}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3480408179072}, {'type': 'perplexity', 'in_batch_dim': [8, 384], 'batch_size': 8, 'flops': 7593617765376}, {'type': 'perplexity', 'in_batch_dim': [8, 448], 'batch_size': 8, 'flops': 8859220715008}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5378812603520}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4113209653888}, {'type': 'perplexity', 'in_batch_dim': [8, 320], 'batch_size': 8, 'flops': 6328014815744}, {'type': 'perplexity', 'in_batch_dim': [8, 560], 'batch_size': 8, 'flops': 11074025876864}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5378812603520}, {'type': 'perplexity', 'in_batch_dim': [8, 160], 'batch_size': 8, 'flops': 3164007441664}, {'type': 'perplexity', 'in_batch_dim': [8, 320], 'batch_size': 8, 'flops': 6328014815744}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5062411866112}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6011614078336}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4746011128704}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6011614078336}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3796808916480}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4429610391296}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4113209653888}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4746011128704}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3480408179072}, {'type': 'perplexity', 'in_batch_dim': [8, 384], 'batch_size': 8, 'flops': 7593617765376}, {'type': 'perplexity', 'in_batch_dim': [8, 576], 'batch_size': 8, 'flops': 11390426614272}, {'type': 'perplexity', 'in_batch_dim': [8, 160], 'batch_size': 8, 'flops': 3164007441664}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4746011128704}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4746011128704}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4429610391296}, {'type': 'perplexity', 'in_batch_dim': [8, 336], 'batch_size': 8, 'flops': 6644415553152}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4746011128704}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3796808916480}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3796808916480}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5062411866112}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5378812603520}, {'type': 'perplexity', 'in_batch_dim': [8, 368], 'batch_size': 8, 'flops': 7277217027968}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3480408179072}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4113209653888}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4746011128704}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3480408179072}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4746011128704}, {'type': 'perplexity', 'in_batch_dim': [8, 1168], 'batch_size': 8, 'flops': 23097253898368}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5062411866112}, {'type': 'perplexity', 'in_batch_dim': [8, 160], 'batch_size': 8, 'flops': 3164007441664}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4746011128704}, {'type': 'perplexity', 'in_batch_dim': [8, 640], 'batch_size': 8, 'flops': 12656029563904}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5378812603520}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4429610391296}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4429610391296}, {'type': 'perplexity', 'in_batch_dim': [8, 384], 'batch_size': 8, 'flops': 7593617765376}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3480408179072}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4113209653888}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3796808916480}, {'type': 'perplexity', 'in_batch_dim': [8, 336], 'batch_size': 8, 'flops': 6644415553152}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3480408179072}, {'type': 'perplexity', 'in_batch_dim': [6, 208], 'batch_size': 8, 'flops': 4113209653888}], 'timestamp': '2025-09-10 02:22:10.925419', 'step': 2940, 'epoch': 2} {'type': 'pplx', 'content': 20450711.8035112, 'timestamp': '2025-09-10 02:22:10.928876', 'step': 2940, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 320], 'flops': 9492337256192}, 'timestamp': '2025-09-10 02:22:10.958777', 'step': 2940, 'epoch': 2} {'type': 'loss', 'content': 0.00988749973475933, 'timestamp': '2025-09-10 02:22:10.967420', 'step': 2941, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 6644714900096}, 'timestamp': '2025-09-10 02:22:10.999190', 'step': 2941, 'epoch': 2} {'type': 'loss', 'content': 0.0008435306954197586, 'timestamp': '2025-09-10 02:22:11.006091', 'step': 2942, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 8543129804160}, 'timestamp': '2025-09-10 02:22:11.036991', 'step': 2942, 'epoch': 2} {'type': 'loss', 'content': 0.0046732001937925816, 'timestamp': '2025-09-10 02:22:11.047700', 'step': 2943, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 6170111174080}, 'timestamp': '2025-09-10 02:22:11.079822', 'step': 2943, 'epoch': 2} {'type': 'loss', 'content': 0.01026154775172472, 'timestamp': '2025-09-10 02:22:11.107510', 'step': 2944, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 7593922352128}, 'timestamp': '2025-09-10 02:22:11.138368', 'step': 2944, 'epoch': 2} {'type': 'loss', 'content': 0.0024215218145400286, 'timestamp': '2025-09-10 02:22:11.143757', 'step': 2945, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 816], 'flops': 24205052762688}, 'timestamp': '2025-09-10 02:22:11.212348', 'step': 2945, 'epoch': 2} {'type': 'loss', 'content': 0.000555099977646023, 'timestamp': '2025-09-10 02:22:11.240841', 'step': 2946, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 7593922352128}, 'timestamp': '2025-09-10 02:22:11.270823', 'step': 2946, 'epoch': 2} {'type': 'loss', 'content': 0.003919025417417288, 'timestamp': '2025-09-10 02:22:11.278728', 'step': 2947, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 8068526078144}, 'timestamp': '2025-09-10 02:22:11.309682', 'step': 2947, 'epoch': 2} {'type': 'loss', 'content': 0.0007654453511349857, 'timestamp': '2025-09-10 02:22:11.340662', 'step': 2948, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 304], 'flops': 9017733530176}, 'timestamp': '2025-09-10 02:22:11.373858', 'step': 2948, 'epoch': 2} {'type': 'loss', 'content': 0.007311842869967222, 'timestamp': '2025-09-10 02:22:11.383725', 'step': 2949, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 5695507448064}, 'timestamp': '2025-09-10 02:22:11.414295', 'step': 2949, 'epoch': 2} {'type': 'loss', 'content': 0.0015561177860945463, 'timestamp': '2025-09-10 02:22:11.418689', 'step': 2950, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 7593922352128}, 'timestamp': '2025-09-10 02:22:11.449620', 'step': 2950, 'epoch': 2} {'type': 'loss', 'content': 0.003131111618131399, 'timestamp': '2025-09-10 02:22:11.457383', 'step': 2951, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 5695507448064}, 'timestamp': '2025-09-10 02:22:11.487810', 'step': 2951, 'epoch': 2} {'type': 'loss', 'content': 0.004036908969283104, 'timestamp': '2025-09-10 02:22:11.513200', 'step': 2952, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 384], 'flops': 11390752160256}, 'timestamp': '2025-09-10 02:22:11.546861', 'step': 2952, 'epoch': 2} {'type': 'loss', 'content': 0.003508640918880701, 'timestamp': '2025-09-10 02:22:11.560205', 'step': 2953, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 6644714900096}, 'timestamp': '2025-09-10 02:22:11.590549', 'step': 2953, 'epoch': 2} {'type': 'loss', 'content': 0.005527927540242672, 'timestamp': '2025-09-10 02:22:11.597845', 'step': 2954, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 7119318626112}, 'timestamp': '2025-09-10 02:22:11.630810', 'step': 2954, 'epoch': 2} {'type': 'loss', 'content': 0.013634276576340199, 'timestamp': '2025-09-10 02:22:11.638282', 'step': 2955, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 7593922352128}, 'timestamp': '2025-09-10 02:22:11.669259', 'step': 2955, 'epoch': 2} {'type': 'loss', 'content': 0.004053633194416761, 'timestamp': '2025-09-10 02:22:11.697820', 'step': 2956, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 7119318626112}, 'timestamp': '2025-09-10 02:22:11.728460', 'step': 2956, 'epoch': 2} {'type': 'loss', 'content': 0.004635666497051716, 'timestamp': '2025-09-10 02:22:11.733519', 'step': 2957, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 6644714900096}, 'timestamp': '2025-09-10 02:22:11.763929', 'step': 2957, 'epoch': 2} {'type': 'loss', 'content': 0.001686741947196424, 'timestamp': '2025-09-10 02:22:11.770938', 'step': 2958, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 6644714900096}, 'timestamp': '2025-09-10 02:22:11.802254', 'step': 2958, 'epoch': 2} {'type': 'loss', 'content': 0.013765445910394192, 'timestamp': '2025-09-10 02:22:11.809208', 'step': 2959, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 304], 'flops': 9017733530176}, 'timestamp': '2025-09-10 02:22:11.839864', 'step': 2959, 'epoch': 2} {'type': 'loss', 'content': 0.0005294339498504996, 'timestamp': '2025-09-10 02:22:11.872901', 'step': 2960, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 320], 'flops': 9492337256192}, 'timestamp': '2025-09-10 02:22:11.904098', 'step': 2960, 'epoch': 2} {'type': 'loss', 'content': 0.012235159985721111, 'timestamp': '2025-09-10 02:22:11.914672', 'step': 2961, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 368], 'flops': 10916148434240}, 'timestamp': '2025-09-10 02:22:11.951422', 'step': 2961, 'epoch': 2} {'type': 'loss', 'content': 0.004377015866339207, 'timestamp': '2025-09-10 02:22:11.965222', 'step': 2962, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 6170111174080}, 'timestamp': '2025-09-10 02:22:11.996458', 'step': 2962, 'epoch': 2} {'type': 'loss', 'content': 0.0006293201586231589, 'timestamp': '2025-09-10 02:22:12.003260', 'step': 2963, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 8543129804160}, 'timestamp': '2025-09-10 02:22:12.036249', 'step': 2963, 'epoch': 2} {'type': 'loss', 'content': 0.0035972814075648785, 'timestamp': '2025-09-10 02:22:12.068062', 'step': 2964, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 5695507448064}, 'timestamp': '2025-09-10 02:22:12.107853', 'step': 2964, 'epoch': 2} {'type': 'loss', 'content': 8.064762369031087e-05, 'timestamp': '2025-09-10 02:22:12.110062', 'step': 2965, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 6644714900096}, 'timestamp': '2025-09-10 02:22:12.149792', 'step': 2965, 'epoch': 2} {'type': 'loss', 'content': 0.0006316312937997282, 'timestamp': '2025-09-10 02:22:12.157021', 'step': 2966, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 7119318626112}, 'timestamp': '2025-09-10 02:22:12.189130', 'step': 2966, 'epoch': 2} {'type': 'loss', 'content': 0.0010401438921689987, 'timestamp': '2025-09-10 02:22:12.196795', 'step': 2967, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 5695507448064}, 'timestamp': '2025-09-10 02:22:12.235983', 'step': 2967, 'epoch': 2} {'type': 'loss', 'content': 0.016096774488687515, 'timestamp': '2025-09-10 02:22:12.261540', 'step': 2968, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 8068526078144}, 'timestamp': '2025-09-10 02:22:12.295464', 'step': 2968, 'epoch': 2} {'type': 'loss', 'content': 0.002507053781300783, 'timestamp': '2025-09-10 02:22:12.303329', 'step': 2969, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 336], 'flops': 9966940982208}, 'timestamp': '2025-09-10 02:22:12.337886', 'step': 2969, 'epoch': 2} {'type': 'loss', 'content': 0.0009550213580951095, 'timestamp': '2025-09-10 02:22:12.351270', 'step': 2970, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 304], 'flops': 9017733530176}, 'timestamp': '2025-09-10 02:22:12.382407', 'step': 2970, 'epoch': 2} {'type': 'loss', 'content': 0.00042923627188429236, 'timestamp': '2025-09-10 02:22:12.394518', 'step': 2971, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 176], 'flops': 5220903722048}, 'timestamp': '2025-09-10 02:22:12.426545', 'step': 2971, 'epoch': 2} {'type': 'loss', 'content': 0.001259053940884769, 'timestamp': '2025-09-10 02:22:12.451335', 'step': 2972, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 352], 'flops': 10441544708224}, 'timestamp': '2025-09-10 02:22:12.484720', 'step': 2972, 'epoch': 2} {'type': 'loss', 'content': 0.0048403749242424965, 'timestamp': '2025-09-10 02:22:12.497725', 'step': 2973, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 7119318626112}, 'timestamp': '2025-09-10 02:22:12.531127', 'step': 2973, 'epoch': 2} {'type': 'loss', 'content': 0.005875090602785349, 'timestamp': '2025-09-10 02:22:12.538186', 'step': 2974, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 7119318626112}, 'timestamp': '2025-09-10 02:22:12.570319', 'step': 2974, 'epoch': 2} {'type': 'loss', 'content': 0.00015017333498690277, 'timestamp': '2025-09-10 02:22:12.577711', 'step': 2975, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 7593922352128}, 'timestamp': '2025-09-10 02:22:12.608608', 'step': 2975, 'epoch': 2} {'type': 'loss', 'content': 0.01487821340560913, 'timestamp': '2025-09-10 02:22:12.637276', 'step': 2976, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 8068526078144}, 'timestamp': '2025-09-10 02:22:12.670601', 'step': 2976, 'epoch': 2} {'type': 'loss', 'content': 0.0001285710313823074, 'timestamp': '2025-09-10 02:22:12.677586', 'step': 2977, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 8543129804160}, 'timestamp': '2025-09-10 02:22:12.709280', 'step': 2977, 'epoch': 2} {'type': 'loss', 'content': 0.004458011593669653, 'timestamp': '2025-09-10 02:22:12.720274', 'step': 2978, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 5695507448064}, 'timestamp': '2025-09-10 02:22:12.755600', 'step': 2978, 'epoch': 2} {'type': 'loss', 'content': 0.0002914820215664804, 'timestamp': '2025-09-10 02:22:12.760278', 'step': 2979, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 7593922352128}, 'timestamp': '2025-09-10 02:22:12.791212', 'step': 2979, 'epoch': 2} {'type': 'loss', 'content': 0.000645567080937326, 'timestamp': '2025-09-10 02:22:12.819769', 'step': 2980, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:22:12.853166', 'step': 2980, 'epoch': 2} {'type': 'loss', 'content': 0.0005663609481416643, 'timestamp': '2025-09-10 02:22:12.855008', 'step': 2981, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 6170111174080}, 'timestamp': '2025-09-10 02:22:12.885874', 'step': 2981, 'epoch': 2} {'type': 'loss', 'content': 0.03028297796845436, 'timestamp': '2025-09-10 02:22:12.892771', 'step': 2982, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 176], 'flops': 5220903722048}, 'timestamp': '2025-09-10 02:22:12.922939', 'step': 2982, 'epoch': 2} {'type': 'loss', 'content': 0.0005917864036746323, 'timestamp': '2025-09-10 02:22:12.926940', 'step': 2983, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 7593922352128}, 'timestamp': '2025-09-10 02:22:12.959502', 'step': 2983, 'epoch': 2} {'type': 'loss', 'content': 0.001026555197313428, 'timestamp': '2025-09-10 02:22:12.988143', 'step': 2984, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 8068526078144}, 'timestamp': '2025-09-10 02:22:13.028026', 'step': 2984, 'epoch': 2} {'type': 'loss', 'content': 0.0009340193355455995, 'timestamp': '2025-09-10 02:22:13.035431', 'step': 2985, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 6644714900096}, 'timestamp': '2025-09-10 02:22:13.072083', 'step': 2985, 'epoch': 2} {'type': 'loss', 'content': 0.0008202405297197402, 'timestamp': '2025-09-10 02:22:13.079195', 'step': 2986, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 304], 'flops': 9017733530176}, 'timestamp': '2025-09-10 02:22:13.116266', 'step': 2986, 'epoch': 2} {'type': 'loss', 'content': 0.0028560981154441833, 'timestamp': '2025-09-10 02:22:13.128338', 'step': 2987, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 6644714900096}, 'timestamp': '2025-09-10 02:22:13.169528', 'step': 2987, 'epoch': 2} {'type': 'loss', 'content': 0.00015045542386360466, 'timestamp': '2025-09-10 02:22:13.197543', 'step': 2988, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 7593922352128}, 'timestamp': '2025-09-10 02:22:13.229385', 'step': 2988, 'epoch': 2} {'type': 'loss', 'content': 0.0005772449658252299, 'timestamp': '2025-09-10 02:22:13.236140', 'step': 2989, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 7119318626112}, 'timestamp': '2025-09-10 02:22:13.269514', 'step': 2989, 'epoch': 2} {'type': 'loss', 'content': 0.0008060900145210326, 'timestamp': '2025-09-10 02:22:13.277134', 'step': 2990, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 7119318626112}, 'timestamp': '2025-09-10 02:22:13.311834', 'step': 2990, 'epoch': 2} {'type': 'loss', 'content': 0.006533232517540455, 'timestamp': '2025-09-10 02:22:13.319297', 'step': 2991, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 6170111174080}, 'timestamp': '2025-09-10 02:22:13.354743', 'step': 2991, 'epoch': 2} {'type': 'loss', 'content': 0.0189223550260067, 'timestamp': '2025-09-10 02:22:13.382512', 'step': 2992, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 304], 'flops': 9017733530176}, 'timestamp': '2025-09-10 02:22:13.414356', 'step': 2992, 'epoch': 2} {'type': 'loss', 'content': 0.0038932212628424168, 'timestamp': '2025-09-10 02:22:13.424102', 'step': 2993, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 7119318626112}, 'timestamp': '2025-09-10 02:22:13.464029', 'step': 2993, 'epoch': 2} {'type': 'loss', 'content': 0.0023500225506722927, 'timestamp': '2025-09-10 02:22:13.471545', 'step': 2994, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 4746299996032}, 'timestamp': '2025-09-10 02:22:13.519871', 'step': 2994, 'epoch': 2} {'type': 'loss', 'content': 0.029731089249253273, 'timestamp': '2025-09-10 02:22:13.522461', 'step': 2995, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 6644714900096}, 'timestamp': '2025-09-10 02:22:13.556026', 'step': 2995, 'epoch': 2} {'type': 'loss', 'content': 0.00560992443934083, 'timestamp': '2025-09-10 02:22:13.583643', 'step': 2996, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 320], 'flops': 9492337256192}, 'timestamp': '2025-09-10 02:22:13.614809', 'step': 2996, 'epoch': 2} {'type': 'loss', 'content': 0.0013980664080008864, 'timestamp': '2025-09-10 02:22:13.625149', 'step': 2997, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 320], 'flops': 9492337256192}, 'timestamp': '2025-09-10 02:22:13.657053', 'step': 2997, 'epoch': 2} {'type': 'loss', 'content': 0.007363726384937763, 'timestamp': '2025-09-10 02:22:13.669642', 'step': 2998, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 6644714900096}, 'timestamp': '2025-09-10 02:22:13.704091', 'step': 2998, 'epoch': 2} {'type': 'loss', 'content': 0.005370273254811764, 'timestamp': '2025-09-10 02:22:13.711107', 'step': 2999, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 7119318626112}, 'timestamp': '2025-09-10 02:22:13.743172', 'step': 2999, 'epoch': 2} {'type': 'loss', 'content': 0.008254798129200935, 'timestamp': '2025-09-10 02:22:13.771504', 'step': 3000, 'epoch': 2} {'type': 'info', 'content': 'Checkpoint saved at step 3000', 'timestamp': '2025-09-10 02:22:18.507756', 'step': 3000, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 304], 'flops': 9017733530176}, 'timestamp': '2025-09-10 02:22:18.561073', 'step': 3000, 'epoch': 2} {'type': 'loss', 'content': 0.013195289298892021, 'timestamp': '2025-09-10 02:22:18.568642', 'step': 3001, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 5695507448064}, 'timestamp': '2025-09-10 02:22:18.605389', 'step': 3001, 'epoch': 2} {'type': 'loss', 'content': 0.00990669522434473, 'timestamp': '2025-09-10 02:22:18.609221', 'step': 3002, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 5695507448064}, 'timestamp': '2025-09-10 02:22:18.644904', 'step': 3002, 'epoch': 2} {'type': 'loss', 'content': 0.004560007713735104, 'timestamp': '2025-09-10 02:22:18.649210', 'step': 3003, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 176], 'flops': 5220903722048}, 'timestamp': '2025-09-10 02:22:18.687567', 'step': 3003, 'epoch': 2} {'type': 'loss', 'content': 0.0024112870451062918, 'timestamp': '2025-09-10 02:22:18.713265', 'step': 3004, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 7593922352128}, 'timestamp': '2025-09-10 02:22:18.749859', 'step': 3004, 'epoch': 2} {'type': 'loss', 'content': 0.0007551188464276493, 'timestamp': '2025-09-10 02:22:18.754752', 'step': 3005, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 6644714900096}, 'timestamp': '2025-09-10 02:22:18.786814', 'step': 3005, 'epoch': 2} {'type': 'loss', 'content': 0.0002144659374607727, 'timestamp': '2025-09-10 02:22:18.793390', 'step': 3006, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 400], 'flops': 11865355886272}, 'timestamp': '2025-09-10 02:22:18.831745', 'step': 3006, 'epoch': 2} {'type': 'loss', 'content': 0.0013251977507025003, 'timestamp': '2025-09-10 02:22:18.847306', 'step': 3007, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 6644714900096}, 'timestamp': '2025-09-10 02:22:18.879089', 'step': 3007, 'epoch': 2} {'type': 'loss', 'content': 0.00027837217203341424, 'timestamp': '2025-09-10 02:22:18.906511', 'step': 3008, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 7593922352128}, 'timestamp': '2025-09-10 02:22:18.939130', 'step': 3008, 'epoch': 2} {'type': 'loss', 'content': 0.0012016237014904618, 'timestamp': '2025-09-10 02:22:18.944273', 'step': 3009, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 368], 'flops': 10916148434240}, 'timestamp': '2025-09-10 02:22:18.982271', 'step': 3009, 'epoch': 2} {'type': 'loss', 'content': 0.016353409737348557, 'timestamp': '2025-09-10 02:22:18.996050', 'step': 3010, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 6170111174080}, 'timestamp': '2025-09-10 02:22:19.034920', 'step': 3010, 'epoch': 2} {'type': 'loss', 'content': 0.025526031851768494, 'timestamp': '2025-09-10 02:22:19.041451', 'step': 3011, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 8068526078144}, 'timestamp': '2025-09-10 02:22:19.077655', 'step': 3011, 'epoch': 2} {'type': 'loss', 'content': 0.007432910148054361, 'timestamp': '2025-09-10 02:22:19.108392', 'step': 3012, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 304], 'flops': 9017733530176}, 'timestamp': '2025-09-10 02:22:19.146844', 'step': 3012, 'epoch': 2} {'type': 'loss', 'content': 0.0007051877328194678, 'timestamp': '2025-09-10 02:22:19.155911', 'step': 3013, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 320], 'flops': 9492337256192}, 'timestamp': '2025-09-10 02:22:19.193294', 'step': 3013, 'epoch': 2} {'type': 'loss', 'content': 0.010938274674117565, 'timestamp': '2025-09-10 02:22:19.205424', 'step': 3014, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 7119318626112}, 'timestamp': '2025-09-10 02:22:19.241698', 'step': 3014, 'epoch': 2} {'type': 'loss', 'content': 0.03111579827964306, 'timestamp': '2025-09-10 02:22:19.248729', 'step': 3015, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 7119318626112}, 'timestamp': '2025-09-10 02:22:19.280541', 'step': 3015, 'epoch': 2} {'type': 'loss', 'content': 0.008617566898465157, 'timestamp': '2025-09-10 02:22:19.308351', 'step': 3016, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 7593922352128}, 'timestamp': '2025-09-10 02:22:19.341108', 'step': 3016, 'epoch': 2} {'type': 'loss', 'content': 0.014785193838179111, 'timestamp': '2025-09-10 02:22:19.346261', 'step': 3017, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 6644714900096}, 'timestamp': '2025-09-10 02:22:19.379772', 'step': 3017, 'epoch': 2} {'type': 'loss', 'content': 0.002250525401905179, 'timestamp': '2025-09-10 02:22:19.386866', 'step': 3018, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 7119318626112}, 'timestamp': '2025-09-10 02:22:19.418987', 'step': 3018, 'epoch': 2} {'type': 'loss', 'content': 0.0003496368881314993, 'timestamp': '2025-09-10 02:22:19.426212', 'step': 3019, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 7593922352128}, 'timestamp': '2025-09-10 02:22:19.457212', 'step': 3019, 'epoch': 2} {'type': 'loss', 'content': 0.0003628613776527345, 'timestamp': '2025-09-10 02:22:19.485625', 'step': 3020, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 304], 'flops': 9017733530176}, 'timestamp': '2025-09-10 02:22:19.521575', 'step': 3020, 'epoch': 2} {'type': 'loss', 'content': 0.0066203526221215725, 'timestamp': '2025-09-10 02:22:19.530594', 'step': 3021, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 6170111174080}, 'timestamp': '2025-09-10 02:22:19.563427', 'step': 3021, 'epoch': 2} {'type': 'loss', 'content': 0.00015238435298670083, 'timestamp': '2025-09-10 02:22:19.570361', 'step': 3022, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 6644714900096}, 'timestamp': '2025-09-10 02:22:19.601647', 'step': 3022, 'epoch': 2} {'type': 'loss', 'content': 0.003497667144984007, 'timestamp': '2025-09-10 02:22:19.608232', 'step': 3023, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 8543129804160}, 'timestamp': '2025-09-10 02:22:19.639837', 'step': 3023, 'epoch': 2} {'type': 'loss', 'content': 0.04256868362426758, 'timestamp': '2025-09-10 02:22:19.671190', 'step': 3024, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 176], 'flops': 5220903722048}, 'timestamp': '2025-09-10 02:22:19.703218', 'step': 3024, 'epoch': 2} {'type': 'loss', 'content': 0.0011781870853155851, 'timestamp': '2025-09-10 02:22:19.705106', 'step': 3025, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 6170111174080}, 'timestamp': '2025-09-10 02:22:19.736844', 'step': 3025, 'epoch': 2} {'type': 'loss', 'content': 0.00529795978218317, 'timestamp': '2025-09-10 02:22:19.743694', 'step': 3026, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 7593922352128}, 'timestamp': '2025-09-10 02:22:19.774776', 'step': 3026, 'epoch': 2} {'type': 'loss', 'content': 0.011905157007277012, 'timestamp': '2025-09-10 02:22:19.782323', 'step': 3027, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 7119318626112}, 'timestamp': '2025-09-10 02:22:19.814092', 'step': 3027, 'epoch': 2} {'type': 'loss', 'content': 0.0007259399862959981, 'timestamp': '2025-09-10 02:22:19.842214', 'step': 3028, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 8068526078144}, 'timestamp': '2025-09-10 02:22:19.873893', 'step': 3028, 'epoch': 2} {'type': 'loss', 'content': 0.00030881358543410897, 'timestamp': '2025-09-10 02:22:19.881354', 'step': 3029, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 6644714900096}, 'timestamp': '2025-09-10 02:22:19.913353', 'step': 3029, 'epoch': 2} {'type': 'loss', 'content': 0.02642730250954628, 'timestamp': '2025-09-10 02:22:19.920234', 'step': 3030, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 432], 'flops': 12814563338304}, 'timestamp': '2025-09-10 02:22:19.961020', 'step': 3030, 'epoch': 2} {'type': 'loss', 'content': 0.062217261642217636, 'timestamp': '2025-09-10 02:22:19.977252', 'step': 3031, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 6170111174080}, 'timestamp': '2025-09-10 02:22:20.010741', 'step': 3031, 'epoch': 2} {'type': 'loss', 'content': 0.010299092158675194, 'timestamp': '2025-09-10 02:22:20.038290', 'step': 3032, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 4746299996032}, 'timestamp': '2025-09-10 02:22:20.069983', 'step': 3032, 'epoch': 2} {'type': 'loss', 'content': 0.0013357808347791433, 'timestamp': '2025-09-10 02:22:20.072354', 'step': 3033, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 6170111174080}, 'timestamp': '2025-09-10 02:22:20.104344', 'step': 3033, 'epoch': 2} {'type': 'loss', 'content': 0.00296528497710824, 'timestamp': '2025-09-10 02:22:20.111120', 'step': 3034, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 6644714900096}, 'timestamp': '2025-09-10 02:22:20.142696', 'step': 3034, 'epoch': 2} {'type': 'loss', 'content': 0.037451110780239105, 'timestamp': '2025-09-10 02:22:20.149596', 'step': 3035, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 7119318626112}, 'timestamp': '2025-09-10 02:22:20.182290', 'step': 3035, 'epoch': 2} {'type': 'loss', 'content': 0.0031133827287703753, 'timestamp': '2025-09-10 02:22:20.210302', 'step': 3036, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 352], 'flops': 10441544708224}, 'timestamp': '2025-09-10 02:22:20.242675', 'step': 3036, 'epoch': 2} {'type': 'loss', 'content': 0.0067618959583342075, 'timestamp': '2025-09-10 02:22:20.255690', 'step': 3037, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 176], 'flops': 5220903722048}, 'timestamp': '2025-09-10 02:22:20.287885', 'step': 3037, 'epoch': 2} {'type': 'loss', 'content': 0.0028574629686772823, 'timestamp': '2025-09-10 02:22:20.291785', 'step': 3038, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 6644714900096}, 'timestamp': '2025-09-10 02:22:20.322595', 'step': 3038, 'epoch': 2} {'type': 'loss', 'content': 0.0004291358927730471, 'timestamp': '2025-09-10 02:22:20.329540', 'step': 3039, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 7593922352128}, 'timestamp': '2025-09-10 02:22:20.360367', 'step': 3039, 'epoch': 2} {'type': 'loss', 'content': 0.04179126024246216, 'timestamp': '2025-09-10 02:22:20.388995', 'step': 3040, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 6170111174080}, 'timestamp': '2025-09-10 02:22:20.419723', 'step': 3040, 'epoch': 2} {'type': 'loss', 'content': 0.003463194938376546, 'timestamp': '2025-09-10 02:22:20.424416', 'step': 3041, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 400], 'flops': 11865355886272}, 'timestamp': '2025-09-10 02:22:20.462606', 'step': 3041, 'epoch': 2} {'type': 'loss', 'content': 0.04269900918006897, 'timestamp': '2025-09-10 02:22:20.478289', 'step': 3042, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 7593922352128}, 'timestamp': '2025-09-10 02:22:20.510403', 'step': 3042, 'epoch': 2} {'type': 'loss', 'content': 0.008046741597354412, 'timestamp': '2025-09-10 02:22:20.517900', 'step': 3043, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 6644714900096}, 'timestamp': '2025-09-10 02:22:20.548962', 'step': 3043, 'epoch': 2} {'type': 'loss', 'content': 0.00842567440122366, 'timestamp': '2025-09-10 02:22:20.576760', 'step': 3044, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 6644714900096}, 'timestamp': '2025-09-10 02:22:20.608236', 'step': 3044, 'epoch': 2} {'type': 'loss', 'content': 0.003832954214885831, 'timestamp': '2025-09-10 02:22:20.612900', 'step': 3045, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 6644714900096}, 'timestamp': '2025-09-10 02:22:20.643437', 'step': 3045, 'epoch': 2} {'type': 'loss', 'content': 0.0032644220627844334, 'timestamp': '2025-09-10 02:22:20.650686', 'step': 3046, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 304], 'flops': 9017733530176}, 'timestamp': '2025-09-10 02:22:20.682397', 'step': 3046, 'epoch': 2} {'type': 'loss', 'content': 0.000450856052339077, 'timestamp': '2025-09-10 02:22:20.694639', 'step': 3047, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 6170111174080}, 'timestamp': '2025-09-10 02:22:20.725556', 'step': 3047, 'epoch': 2} {'type': 'loss', 'content': 0.005584734957665205, 'timestamp': '2025-09-10 02:22:20.753421', 'step': 3048, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 6170111174080}, 'timestamp': '2025-09-10 02:22:20.784286', 'step': 3048, 'epoch': 2} {'type': 'loss', 'content': 0.001507714157924056, 'timestamp': '2025-09-10 02:22:20.788909', 'step': 3049, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 6644714900096}, 'timestamp': '2025-09-10 02:22:20.819348', 'step': 3049, 'epoch': 2} {'type': 'loss', 'content': 0.006397690158337355, 'timestamp': '2025-09-10 02:22:20.826389', 'step': 3050, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 7593922352128}, 'timestamp': '2025-09-10 02:22:20.858993', 'step': 3050, 'epoch': 2} {'type': 'loss', 'content': 0.0005857815849594772, 'timestamp': '2025-09-10 02:22:20.866794', 'step': 3051, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 7119318626112}, 'timestamp': '2025-09-10 02:22:20.897758', 'step': 3051, 'epoch': 2} {'type': 'loss', 'content': 0.0018613949650898576, 'timestamp': '2025-09-10 02:22:20.925941', 'step': 3052, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 432], 'flops': 12814563338304}, 'timestamp': '2025-09-10 02:22:20.963332', 'step': 3052, 'epoch': 2} {'type': 'loss', 'content': 0.009978823363780975, 'timestamp': '2025-09-10 02:22:20.979018', 'step': 3053, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 8543129804160}, 'timestamp': '2025-09-10 02:22:21.010775', 'step': 3053, 'epoch': 2} {'type': 'loss', 'content': 0.00379360793158412, 'timestamp': '2025-09-10 02:22:21.021623', 'step': 3054, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 7119318626112}, 'timestamp': '2025-09-10 02:22:21.053164', 'step': 3054, 'epoch': 2} {'type': 'loss', 'content': 0.0018329472513869405, 'timestamp': '2025-09-10 02:22:21.060677', 'step': 3055, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 7119318626112}, 'timestamp': '2025-09-10 02:22:21.091584', 'step': 3055, 'epoch': 2} {'type': 'loss', 'content': 0.014588729478418827, 'timestamp': '2025-09-10 02:22:21.119910', 'step': 3056, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 6170111174080}, 'timestamp': '2025-09-10 02:22:21.150206', 'step': 3056, 'epoch': 2} {'type': 'loss', 'content': 0.032337453216314316, 'timestamp': '2025-09-10 02:22:21.154756', 'step': 3057, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 320], 'flops': 9492337256192}, 'timestamp': '2025-09-10 02:22:21.187083', 'step': 3057, 'epoch': 2} {'type': 'loss', 'content': 0.0008836713968776166, 'timestamp': '2025-09-10 02:22:21.199654', 'step': 3058, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 6170111174080}, 'timestamp': '2025-09-10 02:22:21.231554', 'step': 3058, 'epoch': 2} {'type': 'loss', 'content': 0.0018349305028095841, 'timestamp': '2025-09-10 02:22:21.238523', 'step': 3059, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 8543129804160}, 'timestamp': '2025-09-10 02:22:21.271396', 'step': 3059, 'epoch': 2} {'type': 'loss', 'content': 0.006054178345948458, 'timestamp': '2025-09-10 02:22:21.303173', 'step': 3060, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 7119318626112}, 'timestamp': '2025-09-10 02:22:21.334981', 'step': 3060, 'epoch': 2} {'type': 'loss', 'content': 0.019653644412755966, 'timestamp': '2025-09-10 02:22:21.340260', 'step': 3061, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 320], 'flops': 9492337256192}, 'timestamp': '2025-09-10 02:22:21.372363', 'step': 3061, 'epoch': 2} {'type': 'loss', 'content': 0.006816718727350235, 'timestamp': '2025-09-10 02:22:21.384960', 'step': 3062, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 5695507448064}, 'timestamp': '2025-09-10 02:22:21.415562', 'step': 3062, 'epoch': 2} {'type': 'loss', 'content': 0.0038755948189646006, 'timestamp': '2025-09-10 02:22:21.420127', 'step': 3063, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 304], 'flops': 9017733530176}, 'timestamp': '2025-09-10 02:22:21.450497', 'step': 3063, 'epoch': 2} {'type': 'loss', 'content': 0.0013827037764713168, 'timestamp': '2025-09-10 02:22:21.483511', 'step': 3064, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 336], 'flops': 9966940982208}, 'timestamp': '2025-09-10 02:22:21.518804', 'step': 3064, 'epoch': 2} {'type': 'loss', 'content': 0.010334816761314869, 'timestamp': '2025-09-10 02:22:21.531426', 'step': 3065, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 7119318626112}, 'timestamp': '2025-09-10 02:22:21.564761', 'step': 3065, 'epoch': 2} {'type': 'loss', 'content': 0.011374552734196186, 'timestamp': '2025-09-10 02:22:21.572062', 'step': 3066, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 176], 'flops': 5220903722048}, 'timestamp': '2025-09-10 02:22:21.605520', 'step': 3066, 'epoch': 2} {'type': 'loss', 'content': 0.006082989741116762, 'timestamp': '2025-09-10 02:22:21.609560', 'step': 3067, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 336], 'flops': 9966940982208}, 'timestamp': '2025-09-10 02:22:21.645832', 'step': 3067, 'epoch': 2} {'type': 'loss', 'content': 0.014264583587646484, 'timestamp': '2025-09-10 02:22:21.680079', 'step': 3068, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-10 02:22:21.710880', 'step': 3068, 'epoch': 2} {'type': 'loss', 'content': 0.007734424900263548, 'timestamp': '2025-09-10 02:22:21.713191', 'step': 3069, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 8068526078144}, 'timestamp': '2025-09-10 02:22:21.744035', 'step': 3069, 'epoch': 2} {'type': 'loss', 'content': 0.017366407439112663, 'timestamp': '2025-09-10 02:22:21.754021', 'step': 3070, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 6170111174080}, 'timestamp': '2025-09-10 02:22:21.784755', 'step': 3070, 'epoch': 2} {'type': 'loss', 'content': 0.003733862191438675, 'timestamp': '2025-09-10 02:22:21.791537', 'step': 3071, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 6644714900096}, 'timestamp': '2025-09-10 02:22:21.822650', 'step': 3071, 'epoch': 2} {'type': 'loss', 'content': 0.0025246471632272005, 'timestamp': '2025-09-10 02:22:21.850485', 'step': 3072, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 8068526078144}, 'timestamp': '2025-09-10 02:22:21.881411', 'step': 3072, 'epoch': 2} {'type': 'loss', 'content': 0.013509529642760754, 'timestamp': '2025-09-10 02:22:21.889415', 'step': 3073, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 352], 'flops': 10441544708224}, 'timestamp': '2025-09-10 02:22:21.924665', 'step': 3073, 'epoch': 2} {'type': 'loss', 'content': 0.00036960511351935565, 'timestamp': '2025-09-10 02:22:21.938376', 'step': 3074, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 8543129804160}, 'timestamp': '2025-09-10 02:22:21.973789', 'step': 3074, 'epoch': 2} {'type': 'loss', 'content': 0.0023911669850349426, 'timestamp': '2025-09-10 02:22:21.984903', 'step': 3075, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 400], 'flops': 11865355886272}, 'timestamp': '2025-09-10 02:22:22.039465', 'step': 3075, 'epoch': 2} {'type': 'loss', 'content': 0.0024533343967050314, 'timestamp': '2025-09-10 02:22:22.076033', 'step': 3076, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 8543129804160}, 'timestamp': '2025-09-10 02:22:22.113073', 'step': 3076, 'epoch': 2} {'type': 'loss', 'content': 0.0010715676471590996, 'timestamp': '2025-09-10 02:22:22.121327', 'step': 3077, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 6644714900096}, 'timestamp': '2025-09-10 02:22:22.154654', 'step': 3077, 'epoch': 2} {'type': 'loss', 'content': 0.0028496759478002787, 'timestamp': '2025-09-10 02:22:22.161892', 'step': 3078, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 8543129804160}, 'timestamp': '2025-09-10 02:22:22.200186', 'step': 3078, 'epoch': 2} {'type': 'loss', 'content': 0.009175264276564121, 'timestamp': '2025-09-10 02:22:22.211068', 'step': 3079, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 6170111174080}, 'timestamp': '2025-09-10 02:22:22.250256', 'step': 3079, 'epoch': 2} {'type': 'loss', 'content': 0.0040994067676365376, 'timestamp': '2025-09-10 02:22:22.278185', 'step': 3080, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 6644714900096}, 'timestamp': '2025-09-10 02:22:22.309139', 'step': 3080, 'epoch': 2} {'type': 'loss', 'content': 0.005873729009181261, 'timestamp': '2025-09-10 02:22:22.313893', 'step': 3081, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 5695507448064}, 'timestamp': '2025-09-10 02:22:22.345022', 'step': 3081, 'epoch': 2} {'type': 'loss', 'content': 0.004864770919084549, 'timestamp': '2025-09-10 02:22:22.349484', 'step': 3082, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 176], 'flops': 5220903722048}, 'timestamp': '2025-09-10 02:22:22.380778', 'step': 3082, 'epoch': 2} {'type': 'loss', 'content': 0.0015795464860275388, 'timestamp': '2025-09-10 02:22:22.384862', 'step': 3083, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 5695507448064}, 'timestamp': '2025-09-10 02:22:22.417242', 'step': 3083, 'epoch': 2} {'type': 'loss', 'content': 0.0015763568226248026, 'timestamp': '2025-09-10 02:22:22.442602', 'step': 3084, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 5695507448064}, 'timestamp': '2025-09-10 02:22:22.474120', 'step': 3084, 'epoch': 2} {'type': 'loss', 'content': 0.003129825461655855, 'timestamp': '2025-09-10 02:22:22.476517', 'step': 3085, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 416], 'flops': 12339959612288}, 'timestamp': '2025-09-10 02:22:22.514847', 'step': 3085, 'epoch': 2} {'type': 'loss', 'content': 0.0005250798421911895, 'timestamp': '2025-09-10 02:22:22.530777', 'step': 3086, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 6170111174080}, 'timestamp': '2025-09-10 02:22:22.561937', 'step': 3086, 'epoch': 2} {'type': 'loss', 'content': 0.003833092050626874, 'timestamp': '2025-09-10 02:22:22.568814', 'step': 3087, 'epoch': 2} {'type': 'flops', 'content': [{'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5695213340928}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3796808916480}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4429610391296}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4746011128704}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4746011128704}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6011614078336}, {'type': 'perplexity', 'in_batch_dim': [8, 736], 'batch_size': 8, 'flops': 14554433988352}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3796808916480}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4113209653888}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3796808916480}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4746011128704}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2847606704256}, {'type': 'perplexity', 'in_batch_dim': [8, 320], 'batch_size': 8, 'flops': 6328014815744}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5378812603520}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3796808916480}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4746011128704}, {'type': 'perplexity', 'in_batch_dim': [8, 352], 'batch_size': 8, 'flops': 6960816290560}, {'type': 'perplexity', 'in_batch_dim': [8, 384], 'batch_size': 8, 'flops': 7593617765376}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5378812603520}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4429610391296}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4113209653888}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5062411866112}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5062411866112}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4429610391296}, {'type': 'perplexity', 'in_batch_dim': [8, 480], 'batch_size': 8, 'flops': 9492022189824}, {'type': 'perplexity', 'in_batch_dim': [8, 448], 'batch_size': 8, 'flops': 8859220715008}, {'type': 'perplexity', 'in_batch_dim': [8, 320], 'batch_size': 8, 'flops': 6328014815744}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3796808916480}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6011614078336}, {'type': 'perplexity', 'in_batch_dim': [8, 320], 'batch_size': 8, 'flops': 6328014815744}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5695213340928}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4746011128704}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4429610391296}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5378812603520}, {'type': 'perplexity', 'in_batch_dim': [8, 544], 'batch_size': 8, 'flops': 10757625139456}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4429610391296}, {'type': 'perplexity', 'in_batch_dim': [8, 320], 'batch_size': 8, 'flops': 6328014815744}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4113209653888}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4113209653888}, {'type': 'perplexity', 'in_batch_dim': [8, 320], 'batch_size': 8, 'flops': 6328014815744}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3480408179072}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5062411866112}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6011614078336}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4429610391296}, {'type': 'perplexity', 'in_batch_dim': [8, 384], 'batch_size': 8, 'flops': 7593617765376}, {'type': 'perplexity', 'in_batch_dim': [8, 416], 'batch_size': 8, 'flops': 8226419240192}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4746011128704}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3480408179072}, {'type': 'perplexity', 'in_batch_dim': [8, 384], 'batch_size': 8, 'flops': 7593617765376}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3480408179072}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3480408179072}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5378812603520}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3796808916480}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4113209653888}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5062411866112}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5378812603520}, {'type': 'perplexity', 'in_batch_dim': [8, 320], 'batch_size': 8, 'flops': 6328014815744}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4429610391296}, {'type': 'perplexity', 'in_batch_dim': [8, 160], 'batch_size': 8, 'flops': 3164007441664}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4113209653888}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5695213340928}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3796808916480}, {'type': 'perplexity', 'in_batch_dim': [8, 160], 'batch_size': 8, 'flops': 3164007441664}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6011614078336}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5062411866112}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4429610391296}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3480408179072}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4746011128704}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3480408179072}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4113209653888}, {'type': 'perplexity', 'in_batch_dim': [8, 624], 'batch_size': 8, 'flops': 12339628826496}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4113209653888}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5695213340928}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5062411866112}, {'type': 'perplexity', 'in_batch_dim': [8, 384], 'batch_size': 8, 'flops': 7593617765376}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4429610391296}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4113209653888}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5062411866112}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4429610391296}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5695213340928}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3796808916480}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5378812603520}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4429610391296}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5378812603520}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3796808916480}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5695213340928}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4429610391296}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5062411866112}, {'type': 'perplexity', 'in_batch_dim': [8, 160], 'batch_size': 8, 'flops': 3164007441664}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4746011128704}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5695213340928}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3796808916480}, {'type': 'perplexity', 'in_batch_dim': [8, 416], 'batch_size': 8, 'flops': 8226419240192}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3796808916480}, {'type': 'perplexity', 'in_batch_dim': [8, 160], 'batch_size': 8, 'flops': 3164007441664}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4746011128704}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3796808916480}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2847606704256}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5378812603520}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4429610391296}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4113209653888}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4429610391296}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6011614078336}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4113209653888}, {'type': 'perplexity', 'in_batch_dim': [8, 320], 'batch_size': 8, 'flops': 6328014815744}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4746011128704}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4746011128704}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3796808916480}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5062411866112}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3480408179072}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3796808916480}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3480408179072}, {'type': 'perplexity', 'in_batch_dim': [8, 352], 'batch_size': 8, 'flops': 6960816290560}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5062411866112}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4429610391296}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6011614078336}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5062411866112}, {'type': 'perplexity', 'in_batch_dim': [8, 576], 'batch_size': 8, 'flops': 11390426614272}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4429610391296}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3796808916480}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6011614078336}, {'type': 'perplexity', 'in_batch_dim': [8, 432], 'batch_size': 8, 'flops': 8542819977600}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5378812603520}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5378812603520}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5062411866112}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4113209653888}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4113209653888}, {'type': 'perplexity', 'in_batch_dim': [8, 368], 'batch_size': 8, 'flops': 7277217027968}, {'type': 'perplexity', 'in_batch_dim': [8, 160], 'batch_size': 8, 'flops': 3164007441664}, {'type': 'perplexity', 'in_batch_dim': [8, 368], 'batch_size': 8, 'flops': 7277217027968}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3796808916480}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4746011128704}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4113209653888}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4429610391296}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5695213340928}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5695213340928}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5062411866112}, {'type': 'perplexity', 'in_batch_dim': [8, 432], 'batch_size': 8, 'flops': 8542819977600}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5062411866112}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4113209653888}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3480408179072}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3480408179072}, {'type': 'perplexity', 'in_batch_dim': [8, 160], 'batch_size': 8, 'flops': 3164007441664}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4113209653888}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3796808916480}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5062411866112}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 368], 'batch_size': 8, 'flops': 7277217027968}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5378812603520}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5378812603520}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5062411866112}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2847606704256}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4429610391296}, {'type': 'perplexity', 'in_batch_dim': [8, 320], 'batch_size': 8, 'flops': 6328014815744}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3796808916480}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4429610391296}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4113209653888}, {'type': 'perplexity', 'in_batch_dim': [8, 384], 'batch_size': 8, 'flops': 7593617765376}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5378812603520}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4746011128704}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3480408179072}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5062411866112}, {'type': 'perplexity', 'in_batch_dim': [8, 320], 'batch_size': 8, 'flops': 6328014815744}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5378812603520}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4113209653888}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4113209653888}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5695213340928}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3480408179072}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6011614078336}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4746011128704}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5378812603520}, {'type': 'perplexity', 'in_batch_dim': [8, 320], 'batch_size': 8, 'flops': 6328014815744}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4429610391296}, {'type': 'perplexity', 'in_batch_dim': [8, 320], 'batch_size': 8, 'flops': 6328014815744}, {'type': 'perplexity', 'in_batch_dim': [8, 416], 'batch_size': 8, 'flops': 8226419240192}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5062411866112}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5062411866112}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2847606704256}, {'type': 'perplexity', 'in_batch_dim': [8, 336], 'batch_size': 8, 'flops': 6644415553152}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6011614078336}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5062411866112}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4113209653888}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5695213340928}, {'type': 'perplexity', 'in_batch_dim': [8, 416], 'batch_size': 8, 'flops': 8226419240192}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3796808916480}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5695213340928}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4746011128704}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4429610391296}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4113209653888}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3480408179072}, {'type': 'perplexity', 'in_batch_dim': [8, 320], 'batch_size': 8, 'flops': 6328014815744}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3480408179072}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5378812603520}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6011614078336}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4429610391296}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5695213340928}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5695213340928}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3480408179072}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4429610391296}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5062411866112}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3480408179072}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4113209653888}, {'type': 'perplexity', 'in_batch_dim': [8, 352], 'batch_size': 8, 'flops': 6960816290560}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4113209653888}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2847606704256}, {'type': 'perplexity', 'in_batch_dim': [8, 160], 'batch_size': 8, 'flops': 3164007441664}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3796808916480}, {'type': 'perplexity', 'in_batch_dim': [8, 496], 'batch_size': 8, 'flops': 9808422927232}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3480408179072}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4113209653888}, {'type': 'perplexity', 'in_batch_dim': [8, 544], 'batch_size': 8, 'flops': 10757625139456}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6011614078336}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5378812603520}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5695213340928}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4746011128704}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5695213340928}, {'type': 'perplexity', 'in_batch_dim': [8, 384], 'batch_size': 8, 'flops': 7593617765376}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3480408179072}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5378812603520}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3480408179072}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5378812603520}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5062411866112}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3796808916480}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5062411866112}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5695213340928}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5378812603520}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4429610391296}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5062411866112}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3480408179072}, {'type': 'perplexity', 'in_batch_dim': [8, 656], 'batch_size': 8, 'flops': 12972430301312}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5062411866112}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3796808916480}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3480408179072}, {'type': 'perplexity', 'in_batch_dim': [8, 160], 'batch_size': 8, 'flops': 3164007441664}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5695213340928}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6011614078336}, {'type': 'perplexity', 'in_batch_dim': [8, 320], 'batch_size': 8, 'flops': 6328014815744}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5062411866112}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5062411866112}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5695213340928}, {'type': 'perplexity', 'in_batch_dim': [8, 320], 'batch_size': 8, 'flops': 6328014815744}, {'type': 'perplexity', 'in_batch_dim': [8, 384], 'batch_size': 8, 'flops': 7593617765376}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5062411866112}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4746011128704}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4113209653888}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4746011128704}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4746011128704}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3480408179072}, {'type': 'perplexity', 'in_batch_dim': [8, 320], 'batch_size': 8, 'flops': 6328014815744}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5695213340928}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5062411866112}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4113209653888}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4746011128704}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2847606704256}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3796808916480}, {'type': 'perplexity', 'in_batch_dim': [8, 336], 'batch_size': 8, 'flops': 6644415553152}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4113209653888}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4746011128704}, {'type': 'perplexity', 'in_batch_dim': [8, 496], 'batch_size': 8, 'flops': 9808422927232}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5378812603520}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5378812603520}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4429610391296}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5062411866112}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6011614078336}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4746011128704}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3796808916480}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4113209653888}, {'type': 'perplexity', 'in_batch_dim': [8, 496], 'batch_size': 8, 'flops': 9808422927232}, {'type': 'perplexity', 'in_batch_dim': [8, 448], 'batch_size': 8, 'flops': 8859220715008}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6011614078336}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3796808916480}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3480408179072}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5062411866112}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4429610391296}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4113209653888}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4746011128704}, {'type': 'perplexity', 'in_batch_dim': [8, 320], 'batch_size': 8, 'flops': 6328014815744}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6011614078336}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3480408179072}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3796808916480}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3796808916480}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4746011128704}, {'type': 'perplexity', 'in_batch_dim': [8, 416], 'batch_size': 8, 'flops': 8226419240192}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5695213340928}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6011614078336}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6011614078336}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4746011128704}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3480408179072}, {'type': 'perplexity', 'in_batch_dim': [8, 160], 'batch_size': 8, 'flops': 3164007441664}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5695213340928}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3480408179072}, {'type': 'perplexity', 'in_batch_dim': [8, 400], 'batch_size': 8, 'flops': 7910018502784}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4429610391296}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5378812603520}, {'type': 'perplexity', 'in_batch_dim': [8, 400], 'batch_size': 8, 'flops': 7910018502784}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5378812603520}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3480408179072}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6011614078336}, {'type': 'perplexity', 'in_batch_dim': [8, 160], 'batch_size': 8, 'flops': 3164007441664}, {'type': 'perplexity', 'in_batch_dim': [8, 352], 'batch_size': 8, 'flops': 6960816290560}, {'type': 'perplexity', 'in_batch_dim': [8, 320], 'batch_size': 8, 'flops': 6328014815744}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5062411866112}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5062411866112}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4113209653888}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4429610391296}, {'type': 'perplexity', 'in_batch_dim': [8, 352], 'batch_size': 8, 'flops': 6960816290560}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4113209653888}, {'type': 'perplexity', 'in_batch_dim': [8, 160], 'batch_size': 8, 'flops': 3164007441664}, {'type': 'perplexity', 'in_batch_dim': [8, 336], 'batch_size': 8, 'flops': 6644415553152}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4429610391296}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3796808916480}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5378812603520}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5378812603520}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4113209653888}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5378812603520}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3796808916480}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5378812603520}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5378812603520}, {'type': 'perplexity', 'in_batch_dim': [8, 160], 'batch_size': 8, 'flops': 3164007441664}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3480408179072}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6011614078336}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5695213340928}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5378812603520}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5378812603520}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4429610391296}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4746011128704}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6011614078336}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5062411866112}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6011614078336}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5378812603520}, {'type': 'perplexity', 'in_batch_dim': [8, 160], 'batch_size': 8, 'flops': 3164007441664}, {'type': 'perplexity', 'in_batch_dim': [8, 160], 'batch_size': 8, 'flops': 3164007441664}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5062411866112}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5062411866112}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3796808916480}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6011614078336}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4113209653888}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4746011128704}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5378812603520}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3480408179072}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3796808916480}, {'type': 'perplexity', 'in_batch_dim': [8, 320], 'batch_size': 8, 'flops': 6328014815744}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5062411866112}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4746011128704}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5062411866112}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4746011128704}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6011614078336}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3796808916480}, {'type': 'perplexity', 'in_batch_dim': [8, 160], 'batch_size': 8, 'flops': 3164007441664}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2847606704256}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4429610391296}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4746011128704}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5062411866112}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5378812603520}, {'type': 'perplexity', 'in_batch_dim': [8, 464], 'batch_size': 8, 'flops': 9175621452416}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4113209653888}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3480408179072}, {'type': 'perplexity', 'in_batch_dim': [8, 384], 'batch_size': 8, 'flops': 7593617765376}, {'type': 'perplexity', 'in_batch_dim': [8, 448], 'batch_size': 8, 'flops': 8859220715008}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5378812603520}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4113209653888}, {'type': 'perplexity', 'in_batch_dim': [8, 320], 'batch_size': 8, 'flops': 6328014815744}, {'type': 'perplexity', 'in_batch_dim': [8, 560], 'batch_size': 8, 'flops': 11074025876864}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5378812603520}, {'type': 'perplexity', 'in_batch_dim': [8, 160], 'batch_size': 8, 'flops': 3164007441664}, {'type': 'perplexity', 'in_batch_dim': [8, 320], 'batch_size': 8, 'flops': 6328014815744}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5062411866112}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6011614078336}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4746011128704}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6011614078336}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3796808916480}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4429610391296}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4113209653888}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4746011128704}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3480408179072}, {'type': 'perplexity', 'in_batch_dim': [8, 384], 'batch_size': 8, 'flops': 7593617765376}, {'type': 'perplexity', 'in_batch_dim': [8, 576], 'batch_size': 8, 'flops': 11390426614272}, {'type': 'perplexity', 'in_batch_dim': [8, 160], 'batch_size': 8, 'flops': 3164007441664}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4746011128704}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4746011128704}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4429610391296}, {'type': 'perplexity', 'in_batch_dim': [8, 336], 'batch_size': 8, 'flops': 6644415553152}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4746011128704}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3796808916480}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3796808916480}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5062411866112}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5378812603520}, {'type': 'perplexity', 'in_batch_dim': [8, 368], 'batch_size': 8, 'flops': 7277217027968}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3480408179072}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4113209653888}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4746011128704}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3480408179072}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4746011128704}, {'type': 'perplexity', 'in_batch_dim': [8, 1168], 'batch_size': 8, 'flops': 23097253898368}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5062411866112}, {'type': 'perplexity', 'in_batch_dim': [8, 160], 'batch_size': 8, 'flops': 3164007441664}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4746011128704}, {'type': 'perplexity', 'in_batch_dim': [8, 640], 'batch_size': 8, 'flops': 12656029563904}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5378812603520}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4429610391296}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4429610391296}, {'type': 'perplexity', 'in_batch_dim': [8, 384], 'batch_size': 8, 'flops': 7593617765376}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3480408179072}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4113209653888}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3796808916480}, {'type': 'perplexity', 'in_batch_dim': [8, 336], 'batch_size': 8, 'flops': 6644415553152}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3480408179072}, {'type': 'perplexity', 'in_batch_dim': [6, 208], 'batch_size': 8, 'flops': 4113209653888}], 'timestamp': '2025-09-10 02:22:33.022514', 'step': 3087, 'epoch': 2} {'type': 'pplx', 'content': 21153755.598216124, 'timestamp': '2025-09-10 02:22:33.029278', 'step': 3087, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 320], 'flops': 9492337256192}, 'timestamp': '2025-09-10 02:22:33.064348', 'step': 3087, 'epoch': 2} {'type': 'loss', 'content': 0.0009679818176664412, 'timestamp': '2025-09-10 02:22:33.096215', 'step': 3088, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 7119318626112}, 'timestamp': '2025-09-10 02:22:33.136391', 'step': 3088, 'epoch': 2} {'type': 'loss', 'content': 0.006355203688144684, 'timestamp': '2025-09-10 02:22:33.141075', 'step': 3089, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 8543129804160}, 'timestamp': '2025-09-10 02:22:33.179377', 'step': 3089, 'epoch': 2} {'type': 'loss', 'content': 0.0007879887707531452, 'timestamp': '2025-09-10 02:22:33.189546', 'step': 3090, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 176], 'flops': 5220903722048}, 'timestamp': '2025-09-10 02:22:33.227835', 'step': 3090, 'epoch': 2} {'type': 'loss', 'content': 0.011465544812381268, 'timestamp': '2025-09-10 02:22:33.233963', 'step': 3091, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 6644714900096}, 'timestamp': '2025-09-10 02:22:33.267909', 'step': 3091, 'epoch': 2} {'type': 'loss', 'content': 0.022248754277825356, 'timestamp': '2025-09-10 02:22:33.295620', 'step': 3092, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 336], 'flops': 9966940982208}, 'timestamp': '2025-09-10 02:22:33.328598', 'step': 3092, 'epoch': 2} {'type': 'loss', 'content': 0.025890696793794632, 'timestamp': '2025-09-10 02:22:33.340896', 'step': 3093, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 400], 'flops': 11865355886272}, 'timestamp': '2025-09-10 02:22:33.378892', 'step': 3093, 'epoch': 2} {'type': 'loss', 'content': 0.001566907623782754, 'timestamp': '2025-09-10 02:22:33.394508', 'step': 3094, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 7119318626112}, 'timestamp': '2025-09-10 02:22:33.427731', 'step': 3094, 'epoch': 2} {'type': 'loss', 'content': 0.0018820820841938257, 'timestamp': '2025-09-10 02:22:33.435036', 'step': 3095, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 7593922352128}, 'timestamp': '2025-09-10 02:22:33.466008', 'step': 3095, 'epoch': 2} {'type': 'loss', 'content': 0.0020010853186249733, 'timestamp': '2025-09-10 02:22:33.494127', 'step': 3096, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-10 02:22:33.525651', 'step': 3096, 'epoch': 2} {'type': 'loss', 'content': 0.0019767414778470993, 'timestamp': '2025-09-10 02:22:33.528109', 'step': 3097, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 5695507448064}, 'timestamp': '2025-09-10 02:22:33.562476', 'step': 3097, 'epoch': 2} {'type': 'loss', 'content': 0.007151364348828793, 'timestamp': '2025-09-10 02:22:33.566486', 'step': 3098, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 6170111174080}, 'timestamp': '2025-09-10 02:22:33.599120', 'step': 3098, 'epoch': 2} {'type': 'loss', 'content': 0.0023438245989382267, 'timestamp': '2025-09-10 02:22:33.605795', 'step': 3099, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 176], 'flops': 5220903722048}, 'timestamp': '2025-09-10 02:22:33.637566', 'step': 3099, 'epoch': 2} {'type': 'loss', 'content': 0.0004773031105287373, 'timestamp': '2025-09-10 02:22:33.662185', 'step': 3100, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 176], 'flops': 5220903722048}, 'timestamp': '2025-09-10 02:22:33.693484', 'step': 3100, 'epoch': 2} {'type': 'loss', 'content': 0.004419370554387569, 'timestamp': '2025-09-10 02:22:33.695734', 'step': 3101, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 5695507448064}, 'timestamp': '2025-09-10 02:22:33.726823', 'step': 3101, 'epoch': 2} {'type': 'loss', 'content': 0.0012314915657043457, 'timestamp': '2025-09-10 02:22:33.731053', 'step': 3102, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 320], 'flops': 9492337256192}, 'timestamp': '2025-09-10 02:22:33.765637', 'step': 3102, 'epoch': 2} {'type': 'loss', 'content': 0.007404958363622427, 'timestamp': '2025-09-10 02:22:33.777847', 'step': 3103, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 6644714900096}, 'timestamp': '2025-09-10 02:22:33.809233', 'step': 3103, 'epoch': 2} {'type': 'loss', 'content': 0.002475725719705224, 'timestamp': '2025-09-10 02:22:33.836868', 'step': 3104, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 8068526078144}, 'timestamp': '2025-09-10 02:22:33.869402', 'step': 3104, 'epoch': 2} {'type': 'loss', 'content': 0.024621224030852318, 'timestamp': '2025-09-10 02:22:33.876148', 'step': 3105, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 6644714900096}, 'timestamp': '2025-09-10 02:22:33.907176', 'step': 3105, 'epoch': 2} {'type': 'loss', 'content': 0.006465516518801451, 'timestamp': '2025-09-10 02:22:33.913998', 'step': 3106, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 336], 'flops': 9966940982208}, 'timestamp': '2025-09-10 02:22:33.947508', 'step': 3106, 'epoch': 2} {'type': 'loss', 'content': 0.0004534423351287842, 'timestamp': '2025-09-10 02:22:33.960891', 'step': 3107, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 6644714900096}, 'timestamp': '2025-09-10 02:22:33.993962', 'step': 3107, 'epoch': 2} {'type': 'loss', 'content': 0.0025858450680971146, 'timestamp': '2025-09-10 02:22:34.022012', 'step': 3108, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 6644714900096}, 'timestamp': '2025-09-10 02:22:34.053224', 'step': 3108, 'epoch': 2} {'type': 'loss', 'content': 0.0024823250714689493, 'timestamp': '2025-09-10 02:22:34.057775', 'step': 3109, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 8543129804160}, 'timestamp': '2025-09-10 02:22:34.089247', 'step': 3109, 'epoch': 2} {'type': 'loss', 'content': 0.004186380188912153, 'timestamp': '2025-09-10 02:22:34.099417', 'step': 3110, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 8543129804160}, 'timestamp': '2025-09-10 02:22:34.131364', 'step': 3110, 'epoch': 2} {'type': 'loss', 'content': 0.0007520094513893127, 'timestamp': '2025-09-10 02:22:34.141494', 'step': 3111, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 7593922352128}, 'timestamp': '2025-09-10 02:22:34.172972', 'step': 3111, 'epoch': 2} {'type': 'loss', 'content': 0.00030438616522587836, 'timestamp': '2025-09-10 02:22:34.201555', 'step': 3112, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 5695507448064}, 'timestamp': '2025-09-10 02:22:34.232666', 'step': 3112, 'epoch': 2} {'type': 'loss', 'content': 0.00505801709368825, 'timestamp': '2025-09-10 02:22:34.234997', 'step': 3113, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 7119318626112}, 'timestamp': '2025-09-10 02:22:34.266322', 'step': 3113, 'epoch': 2} {'type': 'loss', 'content': 0.0030636286828666925, 'timestamp': '2025-09-10 02:22:34.273511', 'step': 3114, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 480], 'flops': 14238374516352}, 'timestamp': '2025-09-10 02:22:34.314929', 'step': 3114, 'epoch': 2} {'type': 'loss', 'content': 0.039970513433218, 'timestamp': '2025-09-10 02:22:34.332216', 'step': 3115, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 320], 'flops': 9492337256192}, 'timestamp': '2025-09-10 02:22:34.364789', 'step': 3115, 'epoch': 2} {'type': 'loss', 'content': 0.012349791824817657, 'timestamp': '2025-09-10 02:22:34.397696', 'step': 3116, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 6644714900096}, 'timestamp': '2025-09-10 02:22:34.428527', 'step': 3116, 'epoch': 2} {'type': 'loss', 'content': 0.0034343355800956488, 'timestamp': '2025-09-10 02:22:34.433349', 'step': 3117, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 6170111174080}, 'timestamp': '2025-09-10 02:22:34.463675', 'step': 3117, 'epoch': 2} {'type': 'loss', 'content': 0.011677572503685951, 'timestamp': '2025-09-10 02:22:34.470737', 'step': 3118, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 320], 'flops': 9492337256192}, 'timestamp': '2025-09-10 02:22:34.502989', 'step': 3118, 'epoch': 2} {'type': 'loss', 'content': 0.0023600461427122355, 'timestamp': '2025-09-10 02:22:34.515486', 'step': 3119, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 384], 'flops': 11390752160256}, 'timestamp': '2025-09-10 02:22:34.550914', 'step': 3119, 'epoch': 2} {'type': 'loss', 'content': 0.0012422216823324561, 'timestamp': '2025-09-10 02:22:34.585815', 'step': 3120, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 6170111174080}, 'timestamp': '2025-09-10 02:22:34.616858', 'step': 3120, 'epoch': 2} {'type': 'loss', 'content': 0.005536832381039858, 'timestamp': '2025-09-10 02:22:34.621216', 'step': 3121, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 6644714900096}, 'timestamp': '2025-09-10 02:22:34.651904', 'step': 3121, 'epoch': 2} {'type': 'loss', 'content': 0.003003006335347891, 'timestamp': '2025-09-10 02:22:34.658901', 'step': 3122, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 6644714900096}, 'timestamp': '2025-09-10 02:22:34.689307', 'step': 3122, 'epoch': 2} {'type': 'loss', 'content': 0.0019031567499041557, 'timestamp': '2025-09-10 02:22:34.696479', 'step': 3123, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 400], 'flops': 11865355886272}, 'timestamp': '2025-09-10 02:22:34.734404', 'step': 3123, 'epoch': 2} {'type': 'loss', 'content': 0.0015222270740196109, 'timestamp': '2025-09-10 02:22:34.770870', 'step': 3124, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 6644714900096}, 'timestamp': '2025-09-10 02:22:34.805056', 'step': 3124, 'epoch': 2} {'type': 'loss', 'content': 0.003712509525939822, 'timestamp': '2025-09-10 02:22:34.809662', 'step': 3125, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 4746299996032}, 'timestamp': '2025-09-10 02:22:34.840686', 'step': 3125, 'epoch': 2} {'type': 'loss', 'content': 0.0011844148393720388, 'timestamp': '2025-09-10 02:22:34.843120', 'step': 3126, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 304], 'flops': 9017733530176}, 'timestamp': '2025-09-10 02:22:34.874283', 'step': 3126, 'epoch': 2} {'type': 'loss', 'content': 0.009678156115114689, 'timestamp': '2025-09-10 02:22:34.885863', 'step': 3127, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 7119318626112}, 'timestamp': '2025-09-10 02:22:34.917147', 'step': 3127, 'epoch': 2} {'type': 'loss', 'content': 0.001543865422718227, 'timestamp': '2025-09-10 02:22:34.945213', 'step': 3128, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 176], 'flops': 5220903722048}, 'timestamp': '2025-09-10 02:22:34.975336', 'step': 3128, 'epoch': 2} {'type': 'loss', 'content': 0.0022532050497829914, 'timestamp': '2025-09-10 02:22:34.977838', 'step': 3129, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 304], 'flops': 9017733530176}, 'timestamp': '2025-09-10 02:22:35.009158', 'step': 3129, 'epoch': 2} {'type': 'loss', 'content': 0.0002928555477410555, 'timestamp': '2025-09-10 02:22:35.021490', 'step': 3130, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 7119318626112}, 'timestamp': '2025-09-10 02:22:35.062819', 'step': 3130, 'epoch': 2} {'type': 'loss', 'content': 0.0005914249341003597, 'timestamp': '2025-09-10 02:22:35.070289', 'step': 3131, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 7119318626112}, 'timestamp': '2025-09-10 02:22:35.115911', 'step': 3131, 'epoch': 2} {'type': 'loss', 'content': 0.0007928982959128916, 'timestamp': '2025-09-10 02:22:35.144531', 'step': 3132, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 5695507448064}, 'timestamp': '2025-09-10 02:22:35.184890', 'step': 3132, 'epoch': 2} {'type': 'loss', 'content': 0.0026510064490139484, 'timestamp': '2025-09-10 02:22:35.193172', 'step': 3133, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 6644714900096}, 'timestamp': '2025-09-10 02:22:35.234127', 'step': 3133, 'epoch': 2} {'type': 'loss', 'content': 0.004251073580235243, 'timestamp': '2025-09-10 02:22:35.241154', 'step': 3134, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 8543129804160}, 'timestamp': '2025-09-10 02:22:35.275464', 'step': 3134, 'epoch': 2} {'type': 'loss', 'content': 0.003887306433171034, 'timestamp': '2025-09-10 02:22:35.286274', 'step': 3135, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 6644714900096}, 'timestamp': '2025-09-10 02:22:35.319472', 'step': 3135, 'epoch': 2} {'type': 'loss', 'content': 0.008226651698350906, 'timestamp': '2025-09-10 02:22:35.347308', 'step': 3136, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 6170111174080}, 'timestamp': '2025-09-10 02:22:35.377446', 'step': 3136, 'epoch': 2} {'type': 'loss', 'content': 0.013611420057713985, 'timestamp': '2025-09-10 02:22:35.381890', 'step': 3137, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 7593922352128}, 'timestamp': '2025-09-10 02:22:35.414659', 'step': 3137, 'epoch': 2} {'type': 'loss', 'content': 0.0004120334633626044, 'timestamp': '2025-09-10 02:22:35.421833', 'step': 3138, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 352], 'flops': 10441544708224}, 'timestamp': '2025-09-10 02:22:35.458490', 'step': 3138, 'epoch': 2} {'type': 'loss', 'content': 0.002287933137267828, 'timestamp': '2025-09-10 02:22:35.472204', 'step': 3139, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 6644714900096}, 'timestamp': '2025-09-10 02:22:35.506202', 'step': 3139, 'epoch': 2} {'type': 'loss', 'content': 0.00017298969032708555, 'timestamp': '2025-09-10 02:22:35.533829', 'step': 3140, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 416], 'flops': 12339959612288}, 'timestamp': '2025-09-10 02:22:35.570686', 'step': 3140, 'epoch': 2} {'type': 'loss', 'content': 0.00024141445464920253, 'timestamp': '2025-09-10 02:22:35.586110', 'step': 3141, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 304], 'flops': 9017733530176}, 'timestamp': '2025-09-10 02:22:35.617668', 'step': 3141, 'epoch': 2} {'type': 'loss', 'content': 0.0008984781452454627, 'timestamp': '2025-09-10 02:22:35.629397', 'step': 3142, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 400], 'flops': 11865355886272}, 'timestamp': '2025-09-10 02:22:35.669036', 'step': 3142, 'epoch': 2} {'type': 'loss', 'content': 0.010877908207476139, 'timestamp': '2025-09-10 02:22:35.684699', 'step': 3143, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 6644714900096}, 'timestamp': '2025-09-10 02:22:35.716267', 'step': 3143, 'epoch': 2} {'type': 'loss', 'content': 0.018387990072369576, 'timestamp': '2025-09-10 02:22:35.743904', 'step': 3144, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 368], 'flops': 10916148434240}, 'timestamp': '2025-09-10 02:22:35.777546', 'step': 3144, 'epoch': 2} {'type': 'loss', 'content': 0.0019596496131271124, 'timestamp': '2025-09-10 02:22:35.790640', 'step': 3145, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 6170111174080}, 'timestamp': '2025-09-10 02:22:35.822410', 'step': 3145, 'epoch': 2} {'type': 'loss', 'content': 0.00041655570385046303, 'timestamp': '2025-09-10 02:22:35.829175', 'step': 3146, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 6644714900096}, 'timestamp': '2025-09-10 02:22:35.861912', 'step': 3146, 'epoch': 2} {'type': 'loss', 'content': 0.0006447642226703465, 'timestamp': '2025-09-10 02:22:35.868664', 'step': 3147, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 528], 'flops': 15662185694400}, 'timestamp': '2025-09-10 02:22:35.915220', 'step': 3147, 'epoch': 2} {'type': 'loss', 'content': 0.0039002900011837482, 'timestamp': '2025-09-10 02:22:35.955086', 'step': 3148, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 7119318626112}, 'timestamp': '2025-09-10 02:22:35.987428', 'step': 3148, 'epoch': 2} {'type': 'loss', 'content': 0.0022350053768604994, 'timestamp': '2025-09-10 02:22:35.992181', 'step': 3149, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 6644714900096}, 'timestamp': '2025-09-10 02:22:36.023626', 'step': 3149, 'epoch': 2} {'type': 'loss', 'content': 0.005409142933785915, 'timestamp': '2025-09-10 02:22:36.030476', 'step': 3150, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 7119318626112}, 'timestamp': '2025-09-10 02:22:36.062820', 'step': 3150, 'epoch': 2} {'type': 'loss', 'content': 0.0011333615984767675, 'timestamp': '2025-09-10 02:22:36.070256', 'step': 3151, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 6644714900096}, 'timestamp': '2025-09-10 02:22:36.100823', 'step': 3151, 'epoch': 2} {'type': 'loss', 'content': 0.0002690895344130695, 'timestamp': '2025-09-10 02:22:36.128651', 'step': 3152, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 336], 'flops': 9966940982208}, 'timestamp': '2025-09-10 02:22:36.161113', 'step': 3152, 'epoch': 2} {'type': 'loss', 'content': 0.017261261120438576, 'timestamp': '2025-09-10 02:22:36.173823', 'step': 3153, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 352], 'flops': 10441544708224}, 'timestamp': '2025-09-10 02:22:36.208430', 'step': 3153, 'epoch': 2} {'type': 'loss', 'content': 0.0018687748815864325, 'timestamp': '2025-09-10 02:22:36.222132', 'step': 3154, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 7119318626112}, 'timestamp': '2025-09-10 02:22:36.255541', 'step': 3154, 'epoch': 2} {'type': 'loss', 'content': 0.0018482712330296636, 'timestamp': '2025-09-10 02:22:36.262692', 'step': 3155, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 176], 'flops': 5220903722048}, 'timestamp': '2025-09-10 02:22:36.293994', 'step': 3155, 'epoch': 2} {'type': 'loss', 'content': 0.0002486660669092089, 'timestamp': '2025-09-10 02:22:36.322466', 'step': 3156, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 4746299996032}, 'timestamp': '2025-09-10 02:22:36.353796', 'step': 3156, 'epoch': 2} {'type': 'loss', 'content': 0.0037758280523121357, 'timestamp': '2025-09-10 02:22:36.355847', 'step': 3157, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-10 02:22:36.389754', 'step': 3157, 'epoch': 2} {'type': 'loss', 'content': 0.0007112511666491628, 'timestamp': '2025-09-10 02:22:36.392450', 'step': 3158, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 320], 'flops': 9492337256192}, 'timestamp': '2025-09-10 02:22:36.423967', 'step': 3158, 'epoch': 2} {'type': 'loss', 'content': 0.000233599086641334, 'timestamp': '2025-09-10 02:22:36.436441', 'step': 3159, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 4746299996032}, 'timestamp': '2025-09-10 02:22:36.470084', 'step': 3159, 'epoch': 2} {'type': 'loss', 'content': 0.00017964192375075072, 'timestamp': '2025-09-10 02:22:36.493663', 'step': 3160, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 352], 'flops': 10441544708224}, 'timestamp': '2025-09-10 02:22:36.526932', 'step': 3160, 'epoch': 2} {'type': 'loss', 'content': 0.004801702219992876, 'timestamp': '2025-09-10 02:22:36.540081', 'step': 3161, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 6644714900096}, 'timestamp': '2025-09-10 02:22:36.584674', 'step': 3161, 'epoch': 2} {'type': 'loss', 'content': 0.020199043676257133, 'timestamp': '2025-09-10 02:22:36.591734', 'step': 3162, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 8543129804160}, 'timestamp': '2025-09-10 02:22:36.625173', 'step': 3162, 'epoch': 2} {'type': 'loss', 'content': 0.0010675977682694793, 'timestamp': '2025-09-10 02:22:36.636048', 'step': 3163, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 6170111174080}, 'timestamp': '2025-09-10 02:22:36.673457', 'step': 3163, 'epoch': 2} {'type': 'loss', 'content': 0.0003448014031164348, 'timestamp': '2025-09-10 02:22:36.701119', 'step': 3164, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 6170111174080}, 'timestamp': '2025-09-10 02:22:36.734990', 'step': 3164, 'epoch': 2} {'type': 'loss', 'content': 0.00020471213792916387, 'timestamp': '2025-09-10 02:22:36.739144', 'step': 3165, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 6644714900096}, 'timestamp': '2025-09-10 02:22:36.787514', 'step': 3165, 'epoch': 2} {'type': 'loss', 'content': 0.0005103556322865188, 'timestamp': '2025-09-10 02:22:36.794029', 'step': 3166, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 4746299996032}, 'timestamp': '2025-09-10 02:22:36.827768', 'step': 3166, 'epoch': 2} {'type': 'loss', 'content': 0.002306754468008876, 'timestamp': '2025-09-10 02:22:36.830155', 'step': 3167, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 7119318626112}, 'timestamp': '2025-09-10 02:22:36.863788', 'step': 3167, 'epoch': 2} {'type': 'loss', 'content': 0.0009209056152030826, 'timestamp': '2025-09-10 02:22:36.891997', 'step': 3168, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 352], 'flops': 10441544708224}, 'timestamp': '2025-09-10 02:22:36.925434', 'step': 3168, 'epoch': 2} {'type': 'loss', 'content': 0.00017630930233281106, 'timestamp': '2025-09-10 02:22:36.938464', 'step': 3169, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 6644714900096}, 'timestamp': '2025-09-10 02:22:36.969216', 'step': 3169, 'epoch': 2} {'type': 'loss', 'content': 0.0032021531369537115, 'timestamp': '2025-09-10 02:22:36.976342', 'step': 3170, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 176], 'flops': 5220903722048}, 'timestamp': '2025-09-10 02:22:37.008954', 'step': 3170, 'epoch': 2} {'type': 'loss', 'content': 0.00039557431591674685, 'timestamp': '2025-09-10 02:22:37.013179', 'step': 3171, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 8068526078144}, 'timestamp': '2025-09-10 02:22:37.043748', 'step': 3171, 'epoch': 2} {'type': 'loss', 'content': 0.0010184214916080236, 'timestamp': '2025-09-10 02:22:37.074953', 'step': 3172, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 8068526078144}, 'timestamp': '2025-09-10 02:22:37.105520', 'step': 3172, 'epoch': 2} {'type': 'loss', 'content': 0.00031294874497689307, 'timestamp': '2025-09-10 02:22:37.113472', 'step': 3173, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 352], 'flops': 10441544708224}, 'timestamp': '2025-09-10 02:22:37.149177', 'step': 3173, 'epoch': 2} {'type': 'loss', 'content': 0.0008610020158812404, 'timestamp': '2025-09-10 02:22:37.162901', 'step': 3174, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 336], 'flops': 9966940982208}, 'timestamp': '2025-09-10 02:22:37.197818', 'step': 3174, 'epoch': 2} {'type': 'loss', 'content': 0.0015279522631317377, 'timestamp': '2025-09-10 02:22:37.211216', 'step': 3175, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 8068526078144}, 'timestamp': '2025-09-10 02:22:37.242396', 'step': 3175, 'epoch': 2} {'type': 'loss', 'content': 0.010398059152066708, 'timestamp': '2025-09-10 02:22:37.273537', 'step': 3176, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 5695507448064}, 'timestamp': '2025-09-10 02:22:37.304358', 'step': 3176, 'epoch': 2} {'type': 'loss', 'content': 0.0012553682317957282, 'timestamp': '2025-09-10 02:22:37.306608', 'step': 3177, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 6170111174080}, 'timestamp': '2025-09-10 02:22:37.337888', 'step': 3177, 'epoch': 2} {'type': 'loss', 'content': 0.0010120292427018285, 'timestamp': '2025-09-10 02:22:37.344761', 'step': 3178, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 7593922352128}, 'timestamp': '2025-09-10 02:22:37.375351', 'step': 3178, 'epoch': 2} {'type': 'loss', 'content': 0.003787730587646365, 'timestamp': '2025-09-10 02:22:37.383017', 'step': 3179, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:22:37.413647', 'step': 3179, 'epoch': 2} {'type': 'loss', 'content': 0.013505556620657444, 'timestamp': '2025-09-10 02:22:37.437644', 'step': 3180, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 5695507448064}, 'timestamp': '2025-09-10 02:22:37.468195', 'step': 3180, 'epoch': 2} {'type': 'loss', 'content': 0.003877087030559778, 'timestamp': '2025-09-10 02:22:37.470460', 'step': 3181, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 8068526078144}, 'timestamp': '2025-09-10 02:22:37.501096', 'step': 3181, 'epoch': 2} {'type': 'loss', 'content': 0.00047005919623188674, 'timestamp': '2025-09-10 02:22:37.511281', 'step': 3182, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 176], 'flops': 5220903722048}, 'timestamp': '2025-09-10 02:22:37.541506', 'step': 3182, 'epoch': 2} {'type': 'loss', 'content': 0.0027071668300777674, 'timestamp': '2025-09-10 02:22:37.545685', 'step': 3183, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 5695507448064}, 'timestamp': '2025-09-10 02:22:37.581163', 'step': 3183, 'epoch': 2} {'type': 'loss', 'content': 0.0015775591600686312, 'timestamp': '2025-09-10 02:22:37.606764', 'step': 3184, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 6644714900096}, 'timestamp': '2025-09-10 02:22:37.637287', 'step': 3184, 'epoch': 2} {'type': 'loss', 'content': 0.001980002736672759, 'timestamp': '2025-09-10 02:22:37.642280', 'step': 3185, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 304], 'flops': 9017733530176}, 'timestamp': '2025-09-10 02:22:37.673723', 'step': 3185, 'epoch': 2} {'type': 'loss', 'content': 0.0013841536128893495, 'timestamp': '2025-09-10 02:22:37.685883', 'step': 3186, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 7593922352128}, 'timestamp': '2025-09-10 02:22:37.718254', 'step': 3186, 'epoch': 2} {'type': 'loss', 'content': 0.0009568403474986553, 'timestamp': '2025-09-10 02:22:37.725579', 'step': 3187, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 6644714900096}, 'timestamp': '2025-09-10 02:22:37.757250', 'step': 3187, 'epoch': 2} {'type': 'loss', 'content': 0.0486009381711483, 'timestamp': '2025-09-10 02:22:37.785068', 'step': 3188, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 320], 'flops': 9492337256192}, 'timestamp': '2025-09-10 02:22:37.817158', 'step': 3188, 'epoch': 2} {'type': 'loss', 'content': 0.008468760177493095, 'timestamp': '2025-09-10 02:22:37.826681', 'step': 3189, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 8068526078144}, 'timestamp': '2025-09-10 02:22:37.858796', 'step': 3189, 'epoch': 2} {'type': 'loss', 'content': 0.004440871067345142, 'timestamp': '2025-09-10 02:22:37.868534', 'step': 3190, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 6644714900096}, 'timestamp': '2025-09-10 02:22:37.899339', 'step': 3190, 'epoch': 2} {'type': 'loss', 'content': 0.039386093616485596, 'timestamp': '2025-09-10 02:22:37.906455', 'step': 3191, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 7119318626112}, 'timestamp': '2025-09-10 02:22:37.937573', 'step': 3191, 'epoch': 2} {'type': 'loss', 'content': 0.003340385155752301, 'timestamp': '2025-09-10 02:22:37.965889', 'step': 3192, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 7119318626112}, 'timestamp': '2025-09-10 02:22:37.997269', 'step': 3192, 'epoch': 2} {'type': 'loss', 'content': 0.004237064626067877, 'timestamp': '2025-09-10 02:22:38.002480', 'step': 3193, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 304], 'flops': 9017733530176}, 'timestamp': '2025-09-10 02:22:38.034109', 'step': 3193, 'epoch': 2} {'type': 'loss', 'content': 0.010632021352648735, 'timestamp': '2025-09-10 02:22:38.045869', 'step': 3194, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 416], 'flops': 12339959612288}, 'timestamp': '2025-09-10 02:22:38.084477', 'step': 3194, 'epoch': 2} {'type': 'loss', 'content': 0.0007325800834223628, 'timestamp': '2025-09-10 02:22:38.100425', 'step': 3195, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 5695507448064}, 'timestamp': '2025-09-10 02:22:38.131176', 'step': 3195, 'epoch': 2} {'type': 'loss', 'content': 5.4420535889221355e-05, 'timestamp': '2025-09-10 02:22:38.156710', 'step': 3196, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 6644714900096}, 'timestamp': '2025-09-10 02:22:38.189240', 'step': 3196, 'epoch': 2} {'type': 'loss', 'content': 0.000553667254280299, 'timestamp': '2025-09-10 02:22:38.193935', 'step': 3197, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 480], 'flops': 14238374516352}, 'timestamp': '2025-09-10 02:22:38.235326', 'step': 3197, 'epoch': 2} {'type': 'loss', 'content': 0.02702312171459198, 'timestamp': '2025-09-10 02:22:38.252626', 'step': 3198, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 6644714900096}, 'timestamp': '2025-09-10 02:22:38.283741', 'step': 3198, 'epoch': 2} {'type': 'loss', 'content': 0.000169062870554626, 'timestamp': '2025-09-10 02:22:38.290898', 'step': 3199, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 4746299996032}, 'timestamp': '2025-09-10 02:22:38.322708', 'step': 3199, 'epoch': 2} {'type': 'loss', 'content': 0.0018326956778764725, 'timestamp': '2025-09-10 02:22:38.346260', 'step': 3200, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 7593922352128}, 'timestamp': '2025-09-10 02:22:38.377001', 'step': 3200, 'epoch': 2} {'type': 'loss', 'content': 0.00025423362967558205, 'timestamp': '2025-09-10 02:22:38.382396', 'step': 3201, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 432], 'flops': 12814563338304}, 'timestamp': '2025-09-10 02:22:38.422092', 'step': 3201, 'epoch': 2} {'type': 'loss', 'content': 0.00021461385767906904, 'timestamp': '2025-09-10 02:22:38.438228', 'step': 3202, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 352], 'flops': 10441544708224}, 'timestamp': '2025-09-10 02:22:38.473102', 'step': 3202, 'epoch': 2} {'type': 'loss', 'content': 0.0002993656671606004, 'timestamp': '2025-09-10 02:22:38.486818', 'step': 3203, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 384], 'flops': 11390752160256}, 'timestamp': '2025-09-10 02:22:38.522162', 'step': 3203, 'epoch': 2} {'type': 'loss', 'content': 0.0004899102495983243, 'timestamp': '2025-09-10 02:22:38.557079', 'step': 3204, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 176], 'flops': 5220903722048}, 'timestamp': '2025-09-10 02:22:38.587494', 'step': 3204, 'epoch': 2} {'type': 'loss', 'content': 0.0012291016755625606, 'timestamp': '2025-09-10 02:22:38.589920', 'step': 3205, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 5695507448064}, 'timestamp': '2025-09-10 02:22:38.620619', 'step': 3205, 'epoch': 2} {'type': 'loss', 'content': 0.01229874137789011, 'timestamp': '2025-09-10 02:22:38.624935', 'step': 3206, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 8543129804160}, 'timestamp': '2025-09-10 02:22:38.656016', 'step': 3206, 'epoch': 2} {'type': 'loss', 'content': 0.0004454140434972942, 'timestamp': '2025-09-10 02:22:38.666814', 'step': 3207, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 6170111174080}, 'timestamp': '2025-09-10 02:22:38.707752', 'step': 3207, 'epoch': 2} {'type': 'loss', 'content': 0.00010696732351789251, 'timestamp': '2025-09-10 02:22:38.735577', 'step': 3208, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 8068526078144}, 'timestamp': '2025-09-10 02:22:38.766238', 'step': 3208, 'epoch': 2} {'type': 'loss', 'content': 0.0006230357685126364, 'timestamp': '2025-09-10 02:22:38.774174', 'step': 3209, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 480], 'flops': 14238374516352}, 'timestamp': '2025-09-10 02:22:38.816179', 'step': 3209, 'epoch': 2} {'type': 'loss', 'content': 0.0018624786753207445, 'timestamp': '2025-09-10 02:22:38.833519', 'step': 3210, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 416], 'flops': 12339959612288}, 'timestamp': '2025-09-10 02:22:38.872502', 'step': 3210, 'epoch': 2} {'type': 'loss', 'content': 0.0004688594490289688, 'timestamp': '2025-09-10 02:22:38.888433', 'step': 3211, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 6170111174080}, 'timestamp': '2025-09-10 02:22:38.920488', 'step': 3211, 'epoch': 2} {'type': 'loss', 'content': 0.0005589794600382447, 'timestamp': '2025-09-10 02:22:38.947973', 'step': 3212, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 8068526078144}, 'timestamp': '2025-09-10 02:22:38.980672', 'step': 3212, 'epoch': 2} {'type': 'loss', 'content': 0.001154548255726695, 'timestamp': '2025-09-10 02:22:38.988108', 'step': 3213, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 304], 'flops': 9017733530176}, 'timestamp': '2025-09-10 02:22:39.018998', 'step': 3213, 'epoch': 2} {'type': 'loss', 'content': 0.023087533190846443, 'timestamp': '2025-09-10 02:22:39.030941', 'step': 3214, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 6644714900096}, 'timestamp': '2025-09-10 02:22:39.064641', 'step': 3214, 'epoch': 2} {'type': 'loss', 'content': 0.000464627897599712, 'timestamp': '2025-09-10 02:22:39.071635', 'step': 3215, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 6644714900096}, 'timestamp': '2025-09-10 02:22:39.104297', 'step': 3215, 'epoch': 2} {'type': 'loss', 'content': 0.00038381904596462846, 'timestamp': '2025-09-10 02:22:39.132049', 'step': 3216, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 384], 'flops': 11390752160256}, 'timestamp': '2025-09-10 02:22:39.165104', 'step': 3216, 'epoch': 2} {'type': 'loss', 'content': 0.001965318340808153, 'timestamp': '2025-09-10 02:22:39.178420', 'step': 3217, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 336], 'flops': 9966940982208}, 'timestamp': '2025-09-10 02:22:39.211715', 'step': 3217, 'epoch': 2} {'type': 'loss', 'content': 0.0007221942069008946, 'timestamp': '2025-09-10 02:22:39.225098', 'step': 3218, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 6644714900096}, 'timestamp': '2025-09-10 02:22:39.255813', 'step': 3218, 'epoch': 2} {'type': 'loss', 'content': 0.0005813446477986872, 'timestamp': '2025-09-10 02:22:39.262704', 'step': 3219, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 5695507448064}, 'timestamp': '2025-09-10 02:22:39.293375', 'step': 3219, 'epoch': 2} {'type': 'loss', 'content': 0.0018721247324720025, 'timestamp': '2025-09-10 02:22:39.318543', 'step': 3220, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 6170111174080}, 'timestamp': '2025-09-10 02:22:39.349428', 'step': 3220, 'epoch': 2} {'type': 'loss', 'content': 0.0010139280930161476, 'timestamp': '2025-09-10 02:22:39.353948', 'step': 3221, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 8543129804160}, 'timestamp': '2025-09-10 02:22:39.384772', 'step': 3221, 'epoch': 2} {'type': 'loss', 'content': 0.0010682783322408795, 'timestamp': '2025-09-10 02:22:39.395352', 'step': 3222, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 6170111174080}, 'timestamp': '2025-09-10 02:22:39.427058', 'step': 3222, 'epoch': 2} {'type': 'loss', 'content': 0.014338502660393715, 'timestamp': '2025-09-10 02:22:39.433513', 'step': 3223, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 7593922352128}, 'timestamp': '2025-09-10 02:22:39.465088', 'step': 3223, 'epoch': 2} {'type': 'loss', 'content': 0.01097325049340725, 'timestamp': '2025-09-10 02:22:39.493428', 'step': 3224, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 6644714900096}, 'timestamp': '2025-09-10 02:22:39.525266', 'step': 3224, 'epoch': 2} {'type': 'loss', 'content': 0.00029258467839099467, 'timestamp': '2025-09-10 02:22:39.529639', 'step': 3225, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 6170111174080}, 'timestamp': '2025-09-10 02:22:39.561053', 'step': 3225, 'epoch': 2} {'type': 'loss', 'content': 0.00042427852167747915, 'timestamp': '2025-09-10 02:22:39.567975', 'step': 3226, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 5695507448064}, 'timestamp': '2025-09-10 02:22:39.600915', 'step': 3226, 'epoch': 2} {'type': 'loss', 'content': 0.0023932938929647207, 'timestamp': '2025-09-10 02:22:39.605326', 'step': 3227, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 7593922352128}, 'timestamp': '2025-09-10 02:22:39.637730', 'step': 3227, 'epoch': 2} {'type': 'loss', 'content': 0.0120490537956357, 'timestamp': '2025-09-10 02:22:39.666437', 'step': 3228, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 4746299996032}, 'timestamp': '2025-09-10 02:22:39.698073', 'step': 3228, 'epoch': 2} {'type': 'loss', 'content': 0.0008032119949348271, 'timestamp': '2025-09-10 02:22:39.700494', 'step': 3229, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 6644714900096}, 'timestamp': '2025-09-10 02:22:39.732562', 'step': 3229, 'epoch': 2} {'type': 'loss', 'content': 0.0006666731787845492, 'timestamp': '2025-09-10 02:22:39.739746', 'step': 3230, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 336], 'flops': 9966940982208}, 'timestamp': '2025-09-10 02:22:39.774952', 'step': 3230, 'epoch': 2} {'type': 'loss', 'content': 0.01671520434319973, 'timestamp': '2025-09-10 02:22:39.788296', 'step': 3231, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 6644714900096}, 'timestamp': '2025-09-10 02:22:39.839699', 'step': 3231, 'epoch': 2} {'type': 'loss', 'content': 0.0014305815566331148, 'timestamp': '2025-09-10 02:22:39.867652', 'step': 3232, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 7119318626112}, 'timestamp': '2025-09-10 02:22:39.899367', 'step': 3232, 'epoch': 2} {'type': 'loss', 'content': 0.0004569535667542368, 'timestamp': '2025-09-10 02:22:39.904099', 'step': 3233, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 5695507448064}, 'timestamp': '2025-09-10 02:22:39.934344', 'step': 3233, 'epoch': 2} {'type': 'loss', 'content': 0.00045302906073629856, 'timestamp': '2025-09-10 02:22:39.938752', 'step': 3234, 'epoch': 2} {'type': 'flops', 'content': [{'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5695213340928}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3796808916480}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4429610391296}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4746011128704}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4746011128704}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6011614078336}, {'type': 'perplexity', 'in_batch_dim': [8, 736], 'batch_size': 8, 'flops': 14554433988352}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3796808916480}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4113209653888}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3796808916480}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4746011128704}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2847606704256}, {'type': 'perplexity', 'in_batch_dim': [8, 320], 'batch_size': 8, 'flops': 6328014815744}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5378812603520}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3796808916480}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4746011128704}, {'type': 'perplexity', 'in_batch_dim': [8, 352], 'batch_size': 8, 'flops': 6960816290560}, {'type': 'perplexity', 'in_batch_dim': [8, 384], 'batch_size': 8, 'flops': 7593617765376}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5378812603520}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4429610391296}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4113209653888}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5062411866112}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5062411866112}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4429610391296}, {'type': 'perplexity', 'in_batch_dim': [8, 480], 'batch_size': 8, 'flops': 9492022189824}, {'type': 'perplexity', 'in_batch_dim': [8, 448], 'batch_size': 8, 'flops': 8859220715008}, {'type': 'perplexity', 'in_batch_dim': [8, 320], 'batch_size': 8, 'flops': 6328014815744}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3796808916480}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6011614078336}, {'type': 'perplexity', 'in_batch_dim': [8, 320], 'batch_size': 8, 'flops': 6328014815744}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5695213340928}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4746011128704}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4429610391296}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5378812603520}, {'type': 'perplexity', 'in_batch_dim': [8, 544], 'batch_size': 8, 'flops': 10757625139456}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4429610391296}, {'type': 'perplexity', 'in_batch_dim': [8, 320], 'batch_size': 8, 'flops': 6328014815744}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4113209653888}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4113209653888}, {'type': 'perplexity', 'in_batch_dim': [8, 320], 'batch_size': 8, 'flops': 6328014815744}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3480408179072}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5062411866112}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6011614078336}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4429610391296}, {'type': 'perplexity', 'in_batch_dim': [8, 384], 'batch_size': 8, 'flops': 7593617765376}, {'type': 'perplexity', 'in_batch_dim': [8, 416], 'batch_size': 8, 'flops': 8226419240192}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4746011128704}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3480408179072}, {'type': 'perplexity', 'in_batch_dim': [8, 384], 'batch_size': 8, 'flops': 7593617765376}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3480408179072}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3480408179072}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5378812603520}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3796808916480}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4113209653888}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5062411866112}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5378812603520}, {'type': 'perplexity', 'in_batch_dim': [8, 320], 'batch_size': 8, 'flops': 6328014815744}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4429610391296}, {'type': 'perplexity', 'in_batch_dim': [8, 160], 'batch_size': 8, 'flops': 3164007441664}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4113209653888}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5695213340928}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3796808916480}, {'type': 'perplexity', 'in_batch_dim': [8, 160], 'batch_size': 8, 'flops': 3164007441664}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6011614078336}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5062411866112}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4429610391296}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3480408179072}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4746011128704}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3480408179072}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4113209653888}, {'type': 'perplexity', 'in_batch_dim': [8, 624], 'batch_size': 8, 'flops': 12339628826496}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4113209653888}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5695213340928}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5062411866112}, {'type': 'perplexity', 'in_batch_dim': [8, 384], 'batch_size': 8, 'flops': 7593617765376}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4429610391296}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4113209653888}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5062411866112}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4429610391296}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5695213340928}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3796808916480}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5378812603520}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4429610391296}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5378812603520}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3796808916480}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5695213340928}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4429610391296}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5062411866112}, {'type': 'perplexity', 'in_batch_dim': [8, 160], 'batch_size': 8, 'flops': 3164007441664}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4746011128704}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5695213340928}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3796808916480}, {'type': 'perplexity', 'in_batch_dim': [8, 416], 'batch_size': 8, 'flops': 8226419240192}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3796808916480}, {'type': 'perplexity', 'in_batch_dim': [8, 160], 'batch_size': 8, 'flops': 3164007441664}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4746011128704}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3796808916480}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2847606704256}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5378812603520}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4429610391296}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4113209653888}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4429610391296}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6011614078336}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4113209653888}, {'type': 'perplexity', 'in_batch_dim': [8, 320], 'batch_size': 8, 'flops': 6328014815744}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4746011128704}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4746011128704}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3796808916480}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5062411866112}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3480408179072}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3796808916480}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3480408179072}, {'type': 'perplexity', 'in_batch_dim': [8, 352], 'batch_size': 8, 'flops': 6960816290560}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5062411866112}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4429610391296}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6011614078336}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5062411866112}, {'type': 'perplexity', 'in_batch_dim': [8, 576], 'batch_size': 8, 'flops': 11390426614272}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4429610391296}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3796808916480}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6011614078336}, {'type': 'perplexity', 'in_batch_dim': [8, 432], 'batch_size': 8, 'flops': 8542819977600}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5378812603520}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5378812603520}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5062411866112}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4113209653888}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4113209653888}, {'type': 'perplexity', 'in_batch_dim': [8, 368], 'batch_size': 8, 'flops': 7277217027968}, {'type': 'perplexity', 'in_batch_dim': [8, 160], 'batch_size': 8, 'flops': 3164007441664}, {'type': 'perplexity', 'in_batch_dim': [8, 368], 'batch_size': 8, 'flops': 7277217027968}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3796808916480}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4746011128704}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4113209653888}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4429610391296}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5695213340928}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5695213340928}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5062411866112}, {'type': 'perplexity', 'in_batch_dim': [8, 432], 'batch_size': 8, 'flops': 8542819977600}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5062411866112}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4113209653888}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3480408179072}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3480408179072}, {'type': 'perplexity', 'in_batch_dim': [8, 160], 'batch_size': 8, 'flops': 3164007441664}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4113209653888}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3796808916480}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5062411866112}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 368], 'batch_size': 8, 'flops': 7277217027968}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5378812603520}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5378812603520}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5062411866112}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2847606704256}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4429610391296}, {'type': 'perplexity', 'in_batch_dim': [8, 320], 'batch_size': 8, 'flops': 6328014815744}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3796808916480}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4429610391296}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4113209653888}, {'type': 'perplexity', 'in_batch_dim': [8, 384], 'batch_size': 8, 'flops': 7593617765376}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5378812603520}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4746011128704}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3480408179072}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5062411866112}, {'type': 'perplexity', 'in_batch_dim': [8, 320], 'batch_size': 8, 'flops': 6328014815744}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5378812603520}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4113209653888}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4113209653888}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5695213340928}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3480408179072}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6011614078336}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4746011128704}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5378812603520}, {'type': 'perplexity', 'in_batch_dim': [8, 320], 'batch_size': 8, 'flops': 6328014815744}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4429610391296}, {'type': 'perplexity', 'in_batch_dim': [8, 320], 'batch_size': 8, 'flops': 6328014815744}, {'type': 'perplexity', 'in_batch_dim': [8, 416], 'batch_size': 8, 'flops': 8226419240192}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5062411866112}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5062411866112}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2847606704256}, {'type': 'perplexity', 'in_batch_dim': [8, 336], 'batch_size': 8, 'flops': 6644415553152}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6011614078336}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5062411866112}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4113209653888}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5695213340928}, {'type': 'perplexity', 'in_batch_dim': [8, 416], 'batch_size': 8, 'flops': 8226419240192}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3796808916480}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5695213340928}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4746011128704}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4429610391296}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4113209653888}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3480408179072}, {'type': 'perplexity', 'in_batch_dim': [8, 320], 'batch_size': 8, 'flops': 6328014815744}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3480408179072}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5378812603520}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6011614078336}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4429610391296}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5695213340928}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5695213340928}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3480408179072}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4429610391296}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5062411866112}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3480408179072}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4113209653888}, {'type': 'perplexity', 'in_batch_dim': [8, 352], 'batch_size': 8, 'flops': 6960816290560}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4113209653888}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2847606704256}, {'type': 'perplexity', 'in_batch_dim': [8, 160], 'batch_size': 8, 'flops': 3164007441664}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3796808916480}, {'type': 'perplexity', 'in_batch_dim': [8, 496], 'batch_size': 8, 'flops': 9808422927232}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3480408179072}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4113209653888}, {'type': 'perplexity', 'in_batch_dim': [8, 544], 'batch_size': 8, 'flops': 10757625139456}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6011614078336}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5378812603520}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5695213340928}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4746011128704}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5695213340928}, {'type': 'perplexity', 'in_batch_dim': [8, 384], 'batch_size': 8, 'flops': 7593617765376}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3480408179072}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5378812603520}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3480408179072}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5378812603520}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5062411866112}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3796808916480}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5062411866112}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5695213340928}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5378812603520}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4429610391296}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5062411866112}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3480408179072}, {'type': 'perplexity', 'in_batch_dim': [8, 656], 'batch_size': 8, 'flops': 12972430301312}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5062411866112}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3796808916480}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3480408179072}, {'type': 'perplexity', 'in_batch_dim': [8, 160], 'batch_size': 8, 'flops': 3164007441664}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5695213340928}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6011614078336}, {'type': 'perplexity', 'in_batch_dim': [8, 320], 'batch_size': 8, 'flops': 6328014815744}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5062411866112}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5062411866112}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5695213340928}, {'type': 'perplexity', 'in_batch_dim': [8, 320], 'batch_size': 8, 'flops': 6328014815744}, {'type': 'perplexity', 'in_batch_dim': [8, 384], 'batch_size': 8, 'flops': 7593617765376}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5062411866112}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4746011128704}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4113209653888}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4746011128704}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4746011128704}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3480408179072}, {'type': 'perplexity', 'in_batch_dim': [8, 320], 'batch_size': 8, 'flops': 6328014815744}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5695213340928}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5062411866112}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4113209653888}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4746011128704}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2847606704256}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3796808916480}, {'type': 'perplexity', 'in_batch_dim': [8, 336], 'batch_size': 8, 'flops': 6644415553152}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4113209653888}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4746011128704}, {'type': 'perplexity', 'in_batch_dim': [8, 496], 'batch_size': 8, 'flops': 9808422927232}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5378812603520}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5378812603520}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4429610391296}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5062411866112}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6011614078336}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4746011128704}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3796808916480}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4113209653888}, {'type': 'perplexity', 'in_batch_dim': [8, 496], 'batch_size': 8, 'flops': 9808422927232}, {'type': 'perplexity', 'in_batch_dim': [8, 448], 'batch_size': 8, 'flops': 8859220715008}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6011614078336}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3796808916480}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3480408179072}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5062411866112}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4429610391296}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4113209653888}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4746011128704}, {'type': 'perplexity', 'in_batch_dim': [8, 320], 'batch_size': 8, 'flops': 6328014815744}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6011614078336}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3480408179072}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3796808916480}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3796808916480}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4746011128704}, {'type': 'perplexity', 'in_batch_dim': [8, 416], 'batch_size': 8, 'flops': 8226419240192}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5695213340928}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6011614078336}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6011614078336}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4746011128704}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3480408179072}, {'type': 'perplexity', 'in_batch_dim': [8, 160], 'batch_size': 8, 'flops': 3164007441664}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5695213340928}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3480408179072}, {'type': 'perplexity', 'in_batch_dim': [8, 400], 'batch_size': 8, 'flops': 7910018502784}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4429610391296}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5378812603520}, {'type': 'perplexity', 'in_batch_dim': [8, 400], 'batch_size': 8, 'flops': 7910018502784}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5378812603520}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3480408179072}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6011614078336}, {'type': 'perplexity', 'in_batch_dim': [8, 160], 'batch_size': 8, 'flops': 3164007441664}, {'type': 'perplexity', 'in_batch_dim': [8, 352], 'batch_size': 8, 'flops': 6960816290560}, {'type': 'perplexity', 'in_batch_dim': [8, 320], 'batch_size': 8, 'flops': 6328014815744}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5062411866112}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5062411866112}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4113209653888}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4429610391296}, {'type': 'perplexity', 'in_batch_dim': [8, 352], 'batch_size': 8, 'flops': 6960816290560}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4113209653888}, {'type': 'perplexity', 'in_batch_dim': [8, 160], 'batch_size': 8, 'flops': 3164007441664}, {'type': 'perplexity', 'in_batch_dim': [8, 336], 'batch_size': 8, 'flops': 6644415553152}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4429610391296}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3796808916480}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5378812603520}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5378812603520}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4113209653888}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5378812603520}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3796808916480}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5378812603520}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5378812603520}, {'type': 'perplexity', 'in_batch_dim': [8, 160], 'batch_size': 8, 'flops': 3164007441664}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3480408179072}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6011614078336}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5695213340928}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5378812603520}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5378812603520}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4429610391296}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4746011128704}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6011614078336}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5062411866112}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6011614078336}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5378812603520}, {'type': 'perplexity', 'in_batch_dim': [8, 160], 'batch_size': 8, 'flops': 3164007441664}, {'type': 'perplexity', 'in_batch_dim': [8, 160], 'batch_size': 8, 'flops': 3164007441664}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5062411866112}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5062411866112}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3796808916480}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6011614078336}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4113209653888}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4746011128704}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5378812603520}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3480408179072}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3796808916480}, {'type': 'perplexity', 'in_batch_dim': [8, 320], 'batch_size': 8, 'flops': 6328014815744}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5062411866112}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4746011128704}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5062411866112}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4746011128704}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6011614078336}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3796808916480}, {'type': 'perplexity', 'in_batch_dim': [8, 160], 'batch_size': 8, 'flops': 3164007441664}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2847606704256}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4429610391296}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4746011128704}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5062411866112}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5378812603520}, {'type': 'perplexity', 'in_batch_dim': [8, 464], 'batch_size': 8, 'flops': 9175621452416}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4113209653888}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3480408179072}, {'type': 'perplexity', 'in_batch_dim': [8, 384], 'batch_size': 8, 'flops': 7593617765376}, {'type': 'perplexity', 'in_batch_dim': [8, 448], 'batch_size': 8, 'flops': 8859220715008}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5378812603520}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4113209653888}, {'type': 'perplexity', 'in_batch_dim': [8, 320], 'batch_size': 8, 'flops': 6328014815744}, {'type': 'perplexity', 'in_batch_dim': [8, 560], 'batch_size': 8, 'flops': 11074025876864}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5378812603520}, {'type': 'perplexity', 'in_batch_dim': [8, 160], 'batch_size': 8, 'flops': 3164007441664}, {'type': 'perplexity', 'in_batch_dim': [8, 320], 'batch_size': 8, 'flops': 6328014815744}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5062411866112}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6011614078336}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4746011128704}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6011614078336}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3796808916480}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4429610391296}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4113209653888}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4746011128704}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3480408179072}, {'type': 'perplexity', 'in_batch_dim': [8, 384], 'batch_size': 8, 'flops': 7593617765376}, {'type': 'perplexity', 'in_batch_dim': [8, 576], 'batch_size': 8, 'flops': 11390426614272}, {'type': 'perplexity', 'in_batch_dim': [8, 160], 'batch_size': 8, 'flops': 3164007441664}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4746011128704}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4746011128704}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4429610391296}, {'type': 'perplexity', 'in_batch_dim': [8, 336], 'batch_size': 8, 'flops': 6644415553152}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4746011128704}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3796808916480}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3796808916480}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5062411866112}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5378812603520}, {'type': 'perplexity', 'in_batch_dim': [8, 368], 'batch_size': 8, 'flops': 7277217027968}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3480408179072}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4113209653888}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4746011128704}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3480408179072}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4746011128704}, {'type': 'perplexity', 'in_batch_dim': [8, 1168], 'batch_size': 8, 'flops': 23097253898368}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5062411866112}, {'type': 'perplexity', 'in_batch_dim': [8, 160], 'batch_size': 8, 'flops': 3164007441664}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4746011128704}, {'type': 'perplexity', 'in_batch_dim': [8, 640], 'batch_size': 8, 'flops': 12656029563904}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5378812603520}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4429610391296}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4429610391296}, {'type': 'perplexity', 'in_batch_dim': [8, 384], 'batch_size': 8, 'flops': 7593617765376}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3480408179072}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4113209653888}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3796808916480}, {'type': 'perplexity', 'in_batch_dim': [8, 336], 'batch_size': 8, 'flops': 6644415553152}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3480408179072}, {'type': 'perplexity', 'in_batch_dim': [6, 208], 'batch_size': 8, 'flops': 4113209653888}], 'timestamp': '2025-09-10 02:22:49.945100', 'step': 3234, 'epoch': 2} {'type': 'pplx', 'content': 23502743.08364132, 'timestamp': '2025-09-10 02:22:49.948126', 'step': 3234, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 5695507448064}, 'timestamp': '2025-09-10 02:22:49.978817', 'step': 3234, 'epoch': 2} {'type': 'loss', 'content': 0.001856299233622849, 'timestamp': '2025-09-10 02:22:49.982354', 'step': 3235, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 8068526078144}, 'timestamp': '2025-09-10 02:22:50.013341', 'step': 3235, 'epoch': 2} {'type': 'loss', 'content': 0.000491947284899652, 'timestamp': '2025-09-10 02:22:50.043579', 'step': 3236, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 6644714900096}, 'timestamp': '2025-09-10 02:22:50.073820', 'step': 3236, 'epoch': 2} {'type': 'loss', 'content': 0.0004294110112823546, 'timestamp': '2025-09-10 02:22:50.078443', 'step': 3237, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 320], 'flops': 9492337256192}, 'timestamp': '2025-09-10 02:22:50.108498', 'step': 3237, 'epoch': 2} {'type': 'loss', 'content': 0.0006372739444486797, 'timestamp': '2025-09-10 02:22:50.121008', 'step': 3238, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 8543129804160}, 'timestamp': '2025-09-10 02:22:50.152654', 'step': 3238, 'epoch': 2} {'type': 'loss', 'content': 0.0037969518452882767, 'timestamp': '2025-09-10 02:22:50.163174', 'step': 3239, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 8068526078144}, 'timestamp': '2025-09-10 02:22:50.193750', 'step': 3239, 'epoch': 2} {'type': 'loss', 'content': 0.0004036373575218022, 'timestamp': '2025-09-10 02:22:50.224719', 'step': 3240, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 6170111174080}, 'timestamp': '2025-09-10 02:22:50.254784', 'step': 3240, 'epoch': 2} {'type': 'loss', 'content': 0.0003846238541882485, 'timestamp': '2025-09-10 02:22:50.259442', 'step': 3241, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 8068526078144}, 'timestamp': '2025-09-10 02:22:50.289815', 'step': 3241, 'epoch': 2} {'type': 'loss', 'content': 0.02323022112250328, 'timestamp': '2025-09-10 02:22:50.300221', 'step': 3242, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 7593922352128}, 'timestamp': '2025-09-10 02:22:50.331983', 'step': 3242, 'epoch': 2} {'type': 'loss', 'content': 0.0003217768098693341, 'timestamp': '2025-09-10 02:22:50.339433', 'step': 3243, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 784], 'flops': 23255845310656}, 'timestamp': '2025-09-10 02:22:50.404351', 'step': 3243, 'epoch': 2} {'type': 'loss', 'content': 0.0018644100055098534, 'timestamp': '2025-09-10 02:22:50.452450', 'step': 3244, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 320], 'flops': 9492337256192}, 'timestamp': '2025-09-10 02:22:50.483616', 'step': 3244, 'epoch': 2} {'type': 'loss', 'content': 0.001525462488643825, 'timestamp': '2025-09-10 02:22:50.493622', 'step': 3245, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 176], 'flops': 5220903722048}, 'timestamp': '2025-09-10 02:22:50.528687', 'step': 3245, 'epoch': 2} {'type': 'loss', 'content': 0.0013417869340628386, 'timestamp': '2025-09-10 02:22:50.532530', 'step': 3246, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 8543129804160}, 'timestamp': '2025-09-10 02:22:50.563727', 'step': 3246, 'epoch': 2} {'type': 'loss', 'content': 0.0008608666248619556, 'timestamp': '2025-09-10 02:22:50.574306', 'step': 3247, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 8068526078144}, 'timestamp': '2025-09-10 02:22:50.605993', 'step': 3247, 'epoch': 2} {'type': 'loss', 'content': 0.00041623544530011714, 'timestamp': '2025-09-10 02:22:50.636683', 'step': 3248, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 432], 'flops': 12814563338304}, 'timestamp': '2025-09-10 02:22:50.672229', 'step': 3248, 'epoch': 2} {'type': 'loss', 'content': 0.0026598642580211163, 'timestamp': '2025-09-10 02:22:50.688083', 'step': 3249, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 4746299996032}, 'timestamp': '2025-09-10 02:22:50.722087', 'step': 3249, 'epoch': 2} {'type': 'loss', 'content': 0.0033846586011350155, 'timestamp': '2025-09-10 02:22:50.724450', 'step': 3250, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 6170111174080}, 'timestamp': '2025-09-10 02:22:50.760297', 'step': 3250, 'epoch': 2} {'type': 'loss', 'content': 0.00024376294459216297, 'timestamp': '2025-09-10 02:22:50.766874', 'step': 3251, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 7593922352128}, 'timestamp': '2025-09-10 02:22:50.797846', 'step': 3251, 'epoch': 2} {'type': 'loss', 'content': 0.0008604326867498457, 'timestamp': '2025-09-10 02:22:50.826404', 'step': 3252, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 5695507448064}, 'timestamp': '2025-09-10 02:22:50.859109', 'step': 3252, 'epoch': 2} {'type': 'loss', 'content': 0.0002787252014968544, 'timestamp': '2025-09-10 02:22:50.861302', 'step': 3253, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 176], 'flops': 5220903722048}, 'timestamp': '2025-09-10 02:22:50.891014', 'step': 3253, 'epoch': 2} {'type': 'loss', 'content': 0.0013136464403942227, 'timestamp': '2025-09-10 02:22:50.896621', 'step': 3254, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 176], 'flops': 5220903722048}, 'timestamp': '2025-09-10 02:22:50.929511', 'step': 3254, 'epoch': 2} {'type': 'loss', 'content': 0.0002793243620544672, 'timestamp': '2025-09-10 02:22:50.935575', 'step': 3255, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 7593922352128}, 'timestamp': '2025-09-10 02:22:50.968465', 'step': 3255, 'epoch': 2} {'type': 'loss', 'content': 0.0005823360406793654, 'timestamp': '2025-09-10 02:22:50.996851', 'step': 3256, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 7119318626112}, 'timestamp': '2025-09-10 02:22:51.028652', 'step': 3256, 'epoch': 2} {'type': 'loss', 'content': 0.009857832454144955, 'timestamp': '2025-09-10 02:22:51.033616', 'step': 3257, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 176], 'flops': 5220903722048}, 'timestamp': '2025-09-10 02:22:51.069361', 'step': 3257, 'epoch': 2} {'type': 'loss', 'content': 0.0003967168158851564, 'timestamp': '2025-09-10 02:22:51.073645', 'step': 3258, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 7593922352128}, 'timestamp': '2025-09-10 02:22:51.106717', 'step': 3258, 'epoch': 2} {'type': 'loss', 'content': 0.0002505451557226479, 'timestamp': '2025-09-10 02:22:51.114402', 'step': 3259, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 6170111174080}, 'timestamp': '2025-09-10 02:22:51.148211', 'step': 3259, 'epoch': 2} {'type': 'loss', 'content': 0.0007157556829042733, 'timestamp': '2025-09-10 02:22:51.175925', 'step': 3260, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 400], 'flops': 11865355886272}, 'timestamp': '2025-09-10 02:22:51.216586', 'step': 3260, 'epoch': 2} {'type': 'loss', 'content': 0.00013806803326588124, 'timestamp': '2025-09-10 02:22:51.231738', 'step': 3261, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 8068526078144}, 'timestamp': '2025-09-10 02:22:51.268896', 'step': 3261, 'epoch': 2} {'type': 'loss', 'content': 0.00021779598318971694, 'timestamp': '2025-09-10 02:22:51.278799', 'step': 3262, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 176], 'flops': 5220903722048}, 'timestamp': '2025-09-10 02:22:51.316727', 'step': 3262, 'epoch': 2} {'type': 'loss', 'content': 0.008616751991212368, 'timestamp': '2025-09-10 02:22:51.320862', 'step': 3263, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 496], 'flops': 14712978242368}, 'timestamp': '2025-09-10 02:22:51.367488', 'step': 3263, 'epoch': 2} {'type': 'loss', 'content': 0.0011451850878074765, 'timestamp': '2025-09-10 02:22:51.406167', 'step': 3264, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 352], 'flops': 10441544708224}, 'timestamp': '2025-09-10 02:22:51.442444', 'step': 3264, 'epoch': 2} {'type': 'loss', 'content': 0.0010292161023244262, 'timestamp': '2025-09-10 02:22:51.455509', 'step': 3265, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 176], 'flops': 5220903722048}, 'timestamp': '2025-09-10 02:22:51.502272', 'step': 3265, 'epoch': 2} {'type': 'loss', 'content': 0.004708148539066315, 'timestamp': '2025-09-10 02:22:51.506216', 'step': 3266, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 6644714900096}, 'timestamp': '2025-09-10 02:22:51.540036', 'step': 3266, 'epoch': 2} {'type': 'loss', 'content': 0.0003200080245733261, 'timestamp': '2025-09-10 02:22:51.547440', 'step': 3267, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 7119318626112}, 'timestamp': '2025-09-10 02:22:51.597175', 'step': 3267, 'epoch': 2} {'type': 'loss', 'content': 0.0003564673534128815, 'timestamp': '2025-09-10 02:22:51.628072', 'step': 3268, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 6170111174080}, 'timestamp': '2025-09-10 02:22:51.660155', 'step': 3268, 'epoch': 2} {'type': 'loss', 'content': 0.001860837102867663, 'timestamp': '2025-09-10 02:22:51.664785', 'step': 3269, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 176], 'flops': 5220903722048}, 'timestamp': '2025-09-10 02:22:51.698093', 'step': 3269, 'epoch': 2} {'type': 'loss', 'content': 0.0005392608582042158, 'timestamp': '2025-09-10 02:22:51.703230', 'step': 3270, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 7119318626112}, 'timestamp': '2025-09-10 02:22:51.742970', 'step': 3270, 'epoch': 2} {'type': 'loss', 'content': 0.0032175458036363125, 'timestamp': '2025-09-10 02:22:51.750416', 'step': 3271, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 7119318626112}, 'timestamp': '2025-09-10 02:22:51.783389', 'step': 3271, 'epoch': 2} {'type': 'loss', 'content': 0.009787830524146557, 'timestamp': '2025-09-10 02:22:51.811842', 'step': 3272, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 176], 'flops': 5220903722048}, 'timestamp': '2025-09-10 02:22:51.842319', 'step': 3272, 'epoch': 2} {'type': 'loss', 'content': 0.0008516389061696827, 'timestamp': '2025-09-10 02:22:51.844388', 'step': 3273, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 176], 'flops': 5220903722048}, 'timestamp': '2025-09-10 02:22:51.878358', 'step': 3273, 'epoch': 2} {'type': 'loss', 'content': 0.0036891864147037268, 'timestamp': '2025-09-10 02:22:51.882661', 'step': 3274, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 7593922352128}, 'timestamp': '2025-09-10 02:22:51.916027', 'step': 3274, 'epoch': 2} {'type': 'loss', 'content': 0.00042585088522173464, 'timestamp': '2025-09-10 02:22:51.924005', 'step': 3275, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 7593922352128}, 'timestamp': '2025-09-10 02:22:51.954832', 'step': 3275, 'epoch': 2} {'type': 'loss', 'content': 0.0004672827199101448, 'timestamp': '2025-09-10 02:22:51.983679', 'step': 3276, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 6170111174080}, 'timestamp': '2025-09-10 02:22:52.015837', 'step': 3276, 'epoch': 2} {'type': 'loss', 'content': 0.00011190387886017561, 'timestamp': '2025-09-10 02:22:52.020512', 'step': 3277, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 6170111174080}, 'timestamp': '2025-09-10 02:22:52.054019', 'step': 3277, 'epoch': 2} {'type': 'loss', 'content': 0.0003633495362009853, 'timestamp': '2025-09-10 02:22:52.060849', 'step': 3278, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 320], 'flops': 9492337256192}, 'timestamp': '2025-09-10 02:22:52.094823', 'step': 3278, 'epoch': 2} {'type': 'loss', 'content': 0.0030928533524274826, 'timestamp': '2025-09-10 02:22:52.107417', 'step': 3279, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 8543129804160}, 'timestamp': '2025-09-10 02:22:52.140009', 'step': 3279, 'epoch': 2} {'type': 'loss', 'content': 0.0003447837952990085, 'timestamp': '2025-09-10 02:22:52.172284', 'step': 3280, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 5695507448064}, 'timestamp': '2025-09-10 02:22:52.207809', 'step': 3280, 'epoch': 2} {'type': 'loss', 'content': 0.0003286560531705618, 'timestamp': '2025-09-10 02:22:52.210106', 'step': 3281, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 8068526078144}, 'timestamp': '2025-09-10 02:22:52.240739', 'step': 3281, 'epoch': 2} {'type': 'loss', 'content': 0.0026785004884004593, 'timestamp': '2025-09-10 02:22:52.251175', 'step': 3282, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 368], 'flops': 10916148434240}, 'timestamp': '2025-09-10 02:22:52.287644', 'step': 3282, 'epoch': 2} {'type': 'loss', 'content': 0.004041456617414951, 'timestamp': '2025-09-10 02:22:52.301581', 'step': 3283, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 7119318626112}, 'timestamp': '2025-09-10 02:22:52.333684', 'step': 3283, 'epoch': 2} {'type': 'loss', 'content': 0.004617027007043362, 'timestamp': '2025-09-10 02:22:52.361945', 'step': 3284, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 7593922352128}, 'timestamp': '2025-09-10 02:22:52.393199', 'step': 3284, 'epoch': 2} {'type': 'loss', 'content': 0.0017622795421630144, 'timestamp': '2025-09-10 02:22:52.398639', 'step': 3285, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 8068526078144}, 'timestamp': '2025-09-10 02:22:52.429068', 'step': 3285, 'epoch': 2} {'type': 'loss', 'content': 0.0006238414789550006, 'timestamp': '2025-09-10 02:22:52.439567', 'step': 3286, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 176], 'flops': 5220903722048}, 'timestamp': '2025-09-10 02:22:52.470208', 'step': 3286, 'epoch': 2} {'type': 'loss', 'content': 0.005539960693567991, 'timestamp': '2025-09-10 02:22:52.474511', 'step': 3287, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 7593922352128}, 'timestamp': '2025-09-10 02:22:52.504593', 'step': 3287, 'epoch': 2} {'type': 'loss', 'content': 0.0032464484684169292, 'timestamp': '2025-09-10 02:22:52.533337', 'step': 3288, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 8543129804160}, 'timestamp': '2025-09-10 02:22:52.563908', 'step': 3288, 'epoch': 2} {'type': 'loss', 'content': 0.0018969980301335454, 'timestamp': '2025-09-10 02:22:52.572641', 'step': 3289, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 5695507448064}, 'timestamp': '2025-09-10 02:22:52.603471', 'step': 3289, 'epoch': 2} {'type': 'loss', 'content': 0.0011059996904805303, 'timestamp': '2025-09-10 02:22:52.607788', 'step': 3290, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 304], 'flops': 9017733530176}, 'timestamp': '2025-09-10 02:22:52.639390', 'step': 3290, 'epoch': 2} {'type': 'loss', 'content': 0.009358406998217106, 'timestamp': '2025-09-10 02:22:52.651643', 'step': 3291, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 7119318626112}, 'timestamp': '2025-09-10 02:22:52.683118', 'step': 3291, 'epoch': 2} {'type': 'loss', 'content': 0.0013368077343329787, 'timestamp': '2025-09-10 02:22:52.711542', 'step': 3292, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 176], 'flops': 5220903722048}, 'timestamp': '2025-09-10 02:22:52.742817', 'step': 3292, 'epoch': 2} {'type': 'loss', 'content': 0.013290916569530964, 'timestamp': '2025-09-10 02:22:52.744954', 'step': 3293, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 8543129804160}, 'timestamp': '2025-09-10 02:22:52.776523', 'step': 3293, 'epoch': 2} {'type': 'loss', 'content': 0.004001455847173929, 'timestamp': '2025-09-10 02:22:52.787595', 'step': 3294, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 5695507448064}, 'timestamp': '2025-09-10 02:22:52.818335', 'step': 3294, 'epoch': 2} {'type': 'loss', 'content': 0.00019497517496347427, 'timestamp': '2025-09-10 02:22:52.822799', 'step': 3295, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 6644714900096}, 'timestamp': '2025-09-10 02:22:52.855538', 'step': 3295, 'epoch': 2} {'type': 'loss', 'content': 0.0005138739361427724, 'timestamp': '2025-09-10 02:22:52.883393', 'step': 3296, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 7119318626112}, 'timestamp': '2025-09-10 02:22:52.914958', 'step': 3296, 'epoch': 2} {'type': 'loss', 'content': 0.008893569000065327, 'timestamp': '2025-09-10 02:22:52.920142', 'step': 3297, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 416], 'flops': 12339959612288}, 'timestamp': '2025-09-10 02:22:52.959182', 'step': 3297, 'epoch': 2} {'type': 'loss', 'content': 0.0008259877795353532, 'timestamp': '2025-09-10 02:22:52.975036', 'step': 3298, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 7119318626112}, 'timestamp': '2025-09-10 02:22:53.005523', 'step': 3298, 'epoch': 2} {'type': 'loss', 'content': 0.00016621073882561177, 'timestamp': '2025-09-10 02:22:53.013103', 'step': 3299, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 7593922352128}, 'timestamp': '2025-09-10 02:22:53.046542', 'step': 3299, 'epoch': 2} {'type': 'loss', 'content': 0.002620183164253831, 'timestamp': '2025-09-10 02:22:53.075158', 'step': 3300, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 5695507448064}, 'timestamp': '2025-09-10 02:22:53.105095', 'step': 3300, 'epoch': 2} {'type': 'loss', 'content': 0.0029605585150420666, 'timestamp': '2025-09-10 02:22:53.107245', 'step': 3301, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 6170111174080}, 'timestamp': '2025-09-10 02:22:53.137003', 'step': 3301, 'epoch': 2} {'type': 'loss', 'content': 0.0031393535900861025, 'timestamp': '2025-09-10 02:22:53.143954', 'step': 3302, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 7119318626112}, 'timestamp': '2025-09-10 02:22:53.174948', 'step': 3302, 'epoch': 2} {'type': 'loss', 'content': 0.002589694457128644, 'timestamp': '2025-09-10 02:22:53.182628', 'step': 3303, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 7119318626112}, 'timestamp': '2025-09-10 02:22:53.212926', 'step': 3303, 'epoch': 2} {'type': 'loss', 'content': 0.0028403718024492264, 'timestamp': '2025-09-10 02:22:53.241438', 'step': 3304, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 6644714900096}, 'timestamp': '2025-09-10 02:22:53.271110', 'step': 3304, 'epoch': 2} {'type': 'loss', 'content': 0.000695100927259773, 'timestamp': '2025-09-10 02:22:53.275923', 'step': 3305, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 7593922352128}, 'timestamp': '2025-09-10 02:22:53.306170', 'step': 3305, 'epoch': 2} {'type': 'loss', 'content': 0.012687050737440586, 'timestamp': '2025-09-10 02:22:53.313990', 'step': 3306, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 6644714900096}, 'timestamp': '2025-09-10 02:22:53.343830', 'step': 3306, 'epoch': 2} {'type': 'loss', 'content': 0.03439050540328026, 'timestamp': '2025-09-10 02:22:53.350885', 'step': 3307, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 7593922352128}, 'timestamp': '2025-09-10 02:22:53.381182', 'step': 3307, 'epoch': 2} {'type': 'loss', 'content': 0.02955719642341137, 'timestamp': '2025-09-10 02:22:53.409822', 'step': 3308, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 8543129804160}, 'timestamp': '2025-09-10 02:22:53.441083', 'step': 3308, 'epoch': 2} {'type': 'loss', 'content': 0.0008426779531873763, 'timestamp': '2025-09-10 02:22:53.449713', 'step': 3309, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 7119318626112}, 'timestamp': '2025-09-10 02:22:53.480771', 'step': 3309, 'epoch': 2} {'type': 'loss', 'content': 0.014351406134665012, 'timestamp': '2025-09-10 02:22:53.488153', 'step': 3310, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 8068526078144}, 'timestamp': '2025-09-10 02:22:53.519052', 'step': 3310, 'epoch': 2} {'type': 'loss', 'content': 0.0006407542387023568, 'timestamp': '2025-09-10 02:22:53.529339', 'step': 3311, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 336], 'flops': 9966940982208}, 'timestamp': '2025-09-10 02:22:53.562755', 'step': 3311, 'epoch': 2} {'type': 'loss', 'content': 0.013966037891805172, 'timestamp': '2025-09-10 02:22:53.597025', 'step': 3312, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 8068526078144}, 'timestamp': '2025-09-10 02:22:53.627652', 'step': 3312, 'epoch': 2} {'type': 'loss', 'content': 0.0011689442908391356, 'timestamp': '2025-09-10 02:22:53.635469', 'step': 3313, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 176], 'flops': 5220903722048}, 'timestamp': '2025-09-10 02:22:53.665966', 'step': 3313, 'epoch': 2} {'type': 'loss', 'content': 0.0012120773317292333, 'timestamp': '2025-09-10 02:22:53.670069', 'step': 3314, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 176], 'flops': 5220903722048}, 'timestamp': '2025-09-10 02:22:53.701243', 'step': 3314, 'epoch': 2} {'type': 'loss', 'content': 0.011072760447859764, 'timestamp': '2025-09-10 02:22:53.705361', 'step': 3315, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 4746299996032}, 'timestamp': '2025-09-10 02:22:53.735480', 'step': 3315, 'epoch': 2} {'type': 'loss', 'content': 0.01113554835319519, 'timestamp': '2025-09-10 02:22:53.759070', 'step': 3316, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 5695507448064}, 'timestamp': '2025-09-10 02:22:53.789020', 'step': 3316, 'epoch': 2} {'type': 'loss', 'content': 0.0005071196937933564, 'timestamp': '2025-09-10 02:22:53.791168', 'step': 3317, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 7119318626112}, 'timestamp': '2025-09-10 02:22:53.821706', 'step': 3317, 'epoch': 2} {'type': 'loss', 'content': 0.015270305797457695, 'timestamp': '2025-09-10 02:22:53.829189', 'step': 3318, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 384], 'flops': 11390752160256}, 'timestamp': '2025-09-10 02:22:53.863710', 'step': 3318, 'epoch': 2} {'type': 'loss', 'content': 0.003202088875696063, 'timestamp': '2025-09-10 02:22:53.877698', 'step': 3319, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 304], 'flops': 9017733530176}, 'timestamp': '2025-09-10 02:22:53.908537', 'step': 3319, 'epoch': 2} {'type': 'loss', 'content': 0.06514207273721695, 'timestamp': '2025-09-10 02:22:53.941350', 'step': 3320, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 368], 'flops': 10916148434240}, 'timestamp': '2025-09-10 02:22:53.973867', 'step': 3320, 'epoch': 2} {'type': 'loss', 'content': 0.00015472178347408772, 'timestamp': '2025-09-10 02:22:53.987005', 'step': 3321, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 6644714900096}, 'timestamp': '2025-09-10 02:22:54.017702', 'step': 3321, 'epoch': 2} {'type': 'loss', 'content': 0.023015392944216728, 'timestamp': '2025-09-10 02:22:54.024625', 'step': 3322, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 176], 'flops': 5220903722048}, 'timestamp': '2025-09-10 02:22:54.054972', 'step': 3322, 'epoch': 2} {'type': 'loss', 'content': 0.020949339494109154, 'timestamp': '2025-09-10 02:22:54.059051', 'step': 3323, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 6644714900096}, 'timestamp': '2025-09-10 02:22:54.089059', 'step': 3323, 'epoch': 2} {'type': 'loss', 'content': 0.0001148775772890076, 'timestamp': '2025-09-10 02:22:54.116873', 'step': 3324, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 320], 'flops': 9492337256192}, 'timestamp': '2025-09-10 02:22:54.148560', 'step': 3324, 'epoch': 2} {'type': 'loss', 'content': 0.0014125898014754057, 'timestamp': '2025-09-10 02:22:54.159196', 'step': 3325, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 576], 'flops': 17085996872448}, 'timestamp': '2025-09-10 02:22:54.210053', 'step': 3325, 'epoch': 2} {'type': 'loss', 'content': 0.0030126813799142838, 'timestamp': '2025-09-10 02:22:54.229548', 'step': 3326, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 8068526078144}, 'timestamp': '2025-09-10 02:22:54.271396', 'step': 3326, 'epoch': 2} {'type': 'loss', 'content': 0.005848866421729326, 'timestamp': '2025-09-10 02:22:54.281730', 'step': 3327, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 5695507448064}, 'timestamp': '2025-09-10 02:22:54.316933', 'step': 3327, 'epoch': 2} {'type': 'loss', 'content': 0.0003858902200590819, 'timestamp': '2025-09-10 02:22:54.343095', 'step': 3328, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 7593922352128}, 'timestamp': '2025-09-10 02:22:54.377475', 'step': 3328, 'epoch': 2} {'type': 'loss', 'content': 0.0009587566019035876, 'timestamp': '2025-09-10 02:22:54.382978', 'step': 3329, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 352], 'flops': 10441544708224}, 'timestamp': '2025-09-10 02:22:54.422472', 'step': 3329, 'epoch': 2} {'type': 'loss', 'content': 0.0006397636607289314, 'timestamp': '2025-09-10 02:22:54.436228', 'step': 3330, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-10 02:22:54.466890', 'step': 3330, 'epoch': 2} {'type': 'loss', 'content': 0.0004409528919495642, 'timestamp': '2025-09-10 02:22:54.470871', 'step': 3331, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 176], 'flops': 5220903722048}, 'timestamp': '2025-09-10 02:22:54.502681', 'step': 3331, 'epoch': 2} {'type': 'loss', 'content': 0.06097668781876564, 'timestamp': '2025-09-10 02:22:54.527627', 'step': 3332, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 6644714900096}, 'timestamp': '2025-09-10 02:22:54.559173', 'step': 3332, 'epoch': 2} {'type': 'loss', 'content': 0.0003068390360567719, 'timestamp': '2025-09-10 02:22:54.564065', 'step': 3333, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 5695507448064}, 'timestamp': '2025-09-10 02:22:54.599518', 'step': 3333, 'epoch': 2} {'type': 'loss', 'content': 0.04409003257751465, 'timestamp': '2025-09-10 02:22:54.603863', 'step': 3334, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-10 02:22:54.642867', 'step': 3334, 'epoch': 2} {'type': 'loss', 'content': 0.008692757226526737, 'timestamp': '2025-09-10 02:22:54.646571', 'step': 3335, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 5695507448064}, 'timestamp': '2025-09-10 02:22:54.678746', 'step': 3335, 'epoch': 2} {'type': 'loss', 'content': 0.031811974942684174, 'timestamp': '2025-09-10 02:22:54.704331', 'step': 3336, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 8068526078144}, 'timestamp': '2025-09-10 02:22:54.736636', 'step': 3336, 'epoch': 2} {'type': 'loss', 'content': 0.01294754259288311, 'timestamp': '2025-09-10 02:22:54.744616', 'step': 3337, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 7119318626112}, 'timestamp': '2025-09-10 02:22:54.776905', 'step': 3337, 'epoch': 2} {'type': 'loss', 'content': 0.00024945108452811837, 'timestamp': '2025-09-10 02:22:54.784345', 'step': 3338, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 8543129804160}, 'timestamp': '2025-09-10 02:22:54.819269', 'step': 3338, 'epoch': 2} {'type': 'loss', 'content': 0.003305058693513274, 'timestamp': '2025-09-10 02:22:54.829892', 'step': 3339, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 176], 'flops': 5220903722048}, 'timestamp': '2025-09-10 02:22:54.860214', 'step': 3339, 'epoch': 2} {'type': 'loss', 'content': 0.0017841076478362083, 'timestamp': '2025-09-10 02:22:54.885431', 'step': 3340, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 6644714900096}, 'timestamp': '2025-09-10 02:22:54.917395', 'step': 3340, 'epoch': 2} {'type': 'loss', 'content': 0.000721210555639118, 'timestamp': '2025-09-10 02:22:54.922181', 'step': 3341, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-10 02:22:54.954585', 'step': 3341, 'epoch': 2} {'type': 'loss', 'content': 0.0028600546065717936, 'timestamp': '2025-09-10 02:22:54.956462', 'step': 3342, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 4746299996032}, 'timestamp': '2025-09-10 02:22:54.986851', 'step': 3342, 'epoch': 2} {'type': 'loss', 'content': 0.04634520411491394, 'timestamp': '2025-09-10 02:22:54.990315', 'step': 3343, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 7119318626112}, 'timestamp': '2025-09-10 02:22:55.020295', 'step': 3343, 'epoch': 2} {'type': 'loss', 'content': 0.012477157637476921, 'timestamp': '2025-09-10 02:22:55.048779', 'step': 3344, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 336], 'flops': 9966940982208}, 'timestamp': '2025-09-10 02:22:55.080169', 'step': 3344, 'epoch': 2} {'type': 'loss', 'content': 0.0029694880358874798, 'timestamp': '2025-09-10 02:22:55.092968', 'step': 3345, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 5695507448064}, 'timestamp': '2025-09-10 02:22:55.123580', 'step': 3345, 'epoch': 2} {'type': 'loss', 'content': 0.0003129235119558871, 'timestamp': '2025-09-10 02:22:55.127885', 'step': 3346, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 304], 'flops': 9017733530176}, 'timestamp': '2025-09-10 02:22:55.158608', 'step': 3346, 'epoch': 2} {'type': 'loss', 'content': 0.009798407554626465, 'timestamp': '2025-09-10 02:22:55.170856', 'step': 3347, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 6170111174080}, 'timestamp': '2025-09-10 02:22:55.201318', 'step': 3347, 'epoch': 2} {'type': 'loss', 'content': 0.0179388877004385, 'timestamp': '2025-09-10 02:22:55.228995', 'step': 3348, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 7119318626112}, 'timestamp': '2025-09-10 02:22:55.259186', 'step': 3348, 'epoch': 2} {'type': 'loss', 'content': 0.008463481441140175, 'timestamp': '2025-09-10 02:22:55.264411', 'step': 3349, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 6644714900096}, 'timestamp': '2025-09-10 02:22:55.295248', 'step': 3349, 'epoch': 2} {'type': 'loss', 'content': 0.0007734844111837447, 'timestamp': '2025-09-10 02:22:55.302578', 'step': 3350, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 5695507448064}, 'timestamp': '2025-09-10 02:22:55.333607', 'step': 3350, 'epoch': 2} {'type': 'loss', 'content': 0.07793903350830078, 'timestamp': '2025-09-10 02:22:55.337998', 'step': 3351, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 7593922352128}, 'timestamp': '2025-09-10 02:22:55.367868', 'step': 3351, 'epoch': 2} {'type': 'loss', 'content': 0.00045389196020551026, 'timestamp': '2025-09-10 02:22:55.396596', 'step': 3352, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 7593922352128}, 'timestamp': '2025-09-10 02:22:55.426948', 'step': 3352, 'epoch': 2} {'type': 'loss', 'content': 0.01835579052567482, 'timestamp': '2025-09-10 02:22:55.432266', 'step': 3353, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 304], 'flops': 9017733530176}, 'timestamp': '2025-09-10 02:22:55.462789', 'step': 3353, 'epoch': 2} {'type': 'loss', 'content': 0.0028381929732859135, 'timestamp': '2025-09-10 02:22:55.474970', 'step': 3354, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 7119318626112}, 'timestamp': '2025-09-10 02:22:55.505419', 'step': 3354, 'epoch': 2} {'type': 'loss', 'content': 0.001101338886655867, 'timestamp': '2025-09-10 02:22:55.512718', 'step': 3355, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 7119318626112}, 'timestamp': '2025-09-10 02:22:55.542884', 'step': 3355, 'epoch': 2} {'type': 'loss', 'content': 0.006561249028891325, 'timestamp': '2025-09-10 02:22:55.571365', 'step': 3356, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 320], 'flops': 9492337256192}, 'timestamp': '2025-09-10 02:22:55.601655', 'step': 3356, 'epoch': 2} {'type': 'loss', 'content': 0.0058148703537881374, 'timestamp': '2025-09-10 02:22:55.612048', 'step': 3357, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 4746299996032}, 'timestamp': '2025-09-10 02:22:55.642061', 'step': 3357, 'epoch': 2} {'type': 'loss', 'content': 0.0029688451904803514, 'timestamp': '2025-09-10 02:22:55.644840', 'step': 3358, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 304], 'flops': 9017733530176}, 'timestamp': '2025-09-10 02:22:55.675408', 'step': 3358, 'epoch': 2} {'type': 'loss', 'content': 0.01831236109137535, 'timestamp': '2025-09-10 02:22:55.687572', 'step': 3359, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 176], 'flops': 5220903722048}, 'timestamp': '2025-09-10 02:22:55.717698', 'step': 3359, 'epoch': 2} {'type': 'loss', 'content': 0.040482617914676666, 'timestamp': '2025-09-10 02:22:55.742493', 'step': 3360, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 8068526078144}, 'timestamp': '2025-09-10 02:22:55.774544', 'step': 3360, 'epoch': 2} {'type': 'loss', 'content': 0.021457521244883537, 'timestamp': '2025-09-10 02:22:55.782464', 'step': 3361, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 8068526078144}, 'timestamp': '2025-09-10 02:22:55.813342', 'step': 3361, 'epoch': 2} {'type': 'loss', 'content': 0.000866633839905262, 'timestamp': '2025-09-10 02:22:55.823534', 'step': 3362, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 6644714900096}, 'timestamp': '2025-09-10 02:22:55.853993', 'step': 3362, 'epoch': 2} {'type': 'loss', 'content': 0.011205156333744526, 'timestamp': '2025-09-10 02:22:55.861347', 'step': 3363, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 176], 'flops': 5220903722048}, 'timestamp': '2025-09-10 02:22:55.891354', 'step': 3363, 'epoch': 2} {'type': 'loss', 'content': 0.0007767033530399203, 'timestamp': '2025-09-10 02:22:55.916428', 'step': 3364, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 6644714900096}, 'timestamp': '2025-09-10 02:22:55.946368', 'step': 3364, 'epoch': 2} {'type': 'loss', 'content': 0.002113510388880968, 'timestamp': '2025-09-10 02:22:55.951161', 'step': 3365, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 7119318626112}, 'timestamp': '2025-09-10 02:22:55.983402', 'step': 3365, 'epoch': 2} {'type': 'loss', 'content': 0.01282955426722765, 'timestamp': '2025-09-10 02:22:55.990965', 'step': 3366, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 6170111174080}, 'timestamp': '2025-09-10 02:22:56.022097', 'step': 3366, 'epoch': 2} {'type': 'loss', 'content': 0.011836833320558071, 'timestamp': '2025-09-10 02:22:56.028672', 'step': 3367, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 8068526078144}, 'timestamp': '2025-09-10 02:22:56.059408', 'step': 3367, 'epoch': 2} {'type': 'loss', 'content': 0.010014675557613373, 'timestamp': '2025-09-10 02:22:56.090323', 'step': 3368, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 176], 'flops': 5220903722048}, 'timestamp': '2025-09-10 02:22:56.119616', 'step': 3368, 'epoch': 2} {'type': 'loss', 'content': 0.004872338380664587, 'timestamp': '2025-09-10 02:22:56.121441', 'step': 3369, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 5695507448064}, 'timestamp': '2025-09-10 02:22:56.152678', 'step': 3369, 'epoch': 2} {'type': 'loss', 'content': 0.007102675270289183, 'timestamp': '2025-09-10 02:22:56.156926', 'step': 3370, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 5695507448064}, 'timestamp': '2025-09-10 02:22:56.187059', 'step': 3370, 'epoch': 2} {'type': 'loss', 'content': 0.0036141786258667707, 'timestamp': '2025-09-10 02:22:56.191742', 'step': 3371, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 8543129804160}, 'timestamp': '2025-09-10 02:22:56.222806', 'step': 3371, 'epoch': 2} {'type': 'loss', 'content': 0.0022325122263282537, 'timestamp': '2025-09-10 02:22:56.254610', 'step': 3372, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 7593922352128}, 'timestamp': '2025-09-10 02:22:56.286385', 'step': 3372, 'epoch': 2} {'type': 'loss', 'content': 0.027134040370583534, 'timestamp': '2025-09-10 02:22:56.291972', 'step': 3373, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 6644714900096}, 'timestamp': '2025-09-10 02:22:56.322260', 'step': 3373, 'epoch': 2} {'type': 'loss', 'content': 0.011692258529365063, 'timestamp': '2025-09-10 02:22:56.329292', 'step': 3374, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 8543129804160}, 'timestamp': '2025-09-10 02:22:56.360309', 'step': 3374, 'epoch': 2} {'type': 'loss', 'content': 0.0016718130791559815, 'timestamp': '2025-09-10 02:22:56.370979', 'step': 3375, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 416], 'flops': 12339959612288}, 'timestamp': '2025-09-10 02:22:56.408658', 'step': 3375, 'epoch': 2} {'type': 'loss', 'content': 0.012171820737421513, 'timestamp': '2025-09-10 02:22:56.445450', 'step': 3376, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 4746299996032}, 'timestamp': '2025-09-10 02:22:56.475527', 'step': 3376, 'epoch': 2} {'type': 'loss', 'content': 0.001996230101212859, 'timestamp': '2025-09-10 02:22:56.477359', 'step': 3377, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-10 02:22:56.506870', 'step': 3377, 'epoch': 2} {'type': 'loss', 'content': 0.00892335269600153, 'timestamp': '2025-09-10 02:22:56.509304', 'step': 3378, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 6644714900096}, 'timestamp': '2025-09-10 02:22:56.539112', 'step': 3378, 'epoch': 2} {'type': 'loss', 'content': 0.013574354350566864, 'timestamp': '2025-09-10 02:22:56.546166', 'step': 3379, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 320], 'flops': 9492337256192}, 'timestamp': '2025-09-10 02:22:56.577810', 'step': 3379, 'epoch': 2} {'type': 'loss', 'content': 0.010744870640337467, 'timestamp': '2025-09-10 02:22:56.611237', 'step': 3380, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 384], 'flops': 11390752160256}, 'timestamp': '2025-09-10 02:22:56.651979', 'step': 3380, 'epoch': 2} {'type': 'loss', 'content': 0.012002465315163136, 'timestamp': '2025-09-10 02:22:56.665283', 'step': 3381, 'epoch': 2} {'type': 'flops', 'content': [{'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5695213340928}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3796808916480}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4429610391296}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4746011128704}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4746011128704}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6011614078336}, {'type': 'perplexity', 'in_batch_dim': [8, 736], 'batch_size': 8, 'flops': 14554433988352}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3796808916480}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4113209653888}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3796808916480}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4746011128704}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2847606704256}, {'type': 'perplexity', 'in_batch_dim': [8, 320], 'batch_size': 8, 'flops': 6328014815744}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5378812603520}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3796808916480}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4746011128704}, {'type': 'perplexity', 'in_batch_dim': [8, 352], 'batch_size': 8, 'flops': 6960816290560}, {'type': 'perplexity', 'in_batch_dim': [8, 384], 'batch_size': 8, 'flops': 7593617765376}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5378812603520}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4429610391296}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4113209653888}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5062411866112}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5062411866112}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4429610391296}, {'type': 'perplexity', 'in_batch_dim': [8, 480], 'batch_size': 8, 'flops': 9492022189824}, {'type': 'perplexity', 'in_batch_dim': [8, 448], 'batch_size': 8, 'flops': 8859220715008}, {'type': 'perplexity', 'in_batch_dim': [8, 320], 'batch_size': 8, 'flops': 6328014815744}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3796808916480}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6011614078336}, {'type': 'perplexity', 'in_batch_dim': [8, 320], 'batch_size': 8, 'flops': 6328014815744}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5695213340928}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4746011128704}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4429610391296}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5378812603520}, {'type': 'perplexity', 'in_batch_dim': [8, 544], 'batch_size': 8, 'flops': 10757625139456}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4429610391296}, {'type': 'perplexity', 'in_batch_dim': [8, 320], 'batch_size': 8, 'flops': 6328014815744}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4113209653888}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4113209653888}, {'type': 'perplexity', 'in_batch_dim': [8, 320], 'batch_size': 8, 'flops': 6328014815744}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3480408179072}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5062411866112}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6011614078336}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4429610391296}, {'type': 'perplexity', 'in_batch_dim': [8, 384], 'batch_size': 8, 'flops': 7593617765376}, {'type': 'perplexity', 'in_batch_dim': [8, 416], 'batch_size': 8, 'flops': 8226419240192}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4746011128704}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3480408179072}, {'type': 'perplexity', 'in_batch_dim': [8, 384], 'batch_size': 8, 'flops': 7593617765376}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3480408179072}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3480408179072}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5378812603520}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3796808916480}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4113209653888}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5062411866112}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5378812603520}, {'type': 'perplexity', 'in_batch_dim': [8, 320], 'batch_size': 8, 'flops': 6328014815744}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4429610391296}, {'type': 'perplexity', 'in_batch_dim': [8, 160], 'batch_size': 8, 'flops': 3164007441664}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4113209653888}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5695213340928}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3796808916480}, {'type': 'perplexity', 'in_batch_dim': [8, 160], 'batch_size': 8, 'flops': 3164007441664}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6011614078336}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5062411866112}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4429610391296}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3480408179072}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4746011128704}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3480408179072}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4113209653888}, {'type': 'perplexity', 'in_batch_dim': [8, 624], 'batch_size': 8, 'flops': 12339628826496}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4113209653888}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5695213340928}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5062411866112}, {'type': 'perplexity', 'in_batch_dim': [8, 384], 'batch_size': 8, 'flops': 7593617765376}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4429610391296}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4113209653888}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5062411866112}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4429610391296}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5695213340928}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3796808916480}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5378812603520}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4429610391296}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5378812603520}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3796808916480}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5695213340928}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4429610391296}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5062411866112}, {'type': 'perplexity', 'in_batch_dim': [8, 160], 'batch_size': 8, 'flops': 3164007441664}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4746011128704}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5695213340928}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3796808916480}, {'type': 'perplexity', 'in_batch_dim': [8, 416], 'batch_size': 8, 'flops': 8226419240192}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3796808916480}, {'type': 'perplexity', 'in_batch_dim': [8, 160], 'batch_size': 8, 'flops': 3164007441664}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4746011128704}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3796808916480}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2847606704256}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5378812603520}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4429610391296}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4113209653888}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4429610391296}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6011614078336}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4113209653888}, {'type': 'perplexity', 'in_batch_dim': [8, 320], 'batch_size': 8, 'flops': 6328014815744}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4746011128704}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4746011128704}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3796808916480}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5062411866112}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3480408179072}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3796808916480}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3480408179072}, {'type': 'perplexity', 'in_batch_dim': [8, 352], 'batch_size': 8, 'flops': 6960816290560}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5062411866112}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4429610391296}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6011614078336}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5062411866112}, {'type': 'perplexity', 'in_batch_dim': [8, 576], 'batch_size': 8, 'flops': 11390426614272}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4429610391296}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3796808916480}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6011614078336}, {'type': 'perplexity', 'in_batch_dim': [8, 432], 'batch_size': 8, 'flops': 8542819977600}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5378812603520}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5378812603520}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5062411866112}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4113209653888}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4113209653888}, {'type': 'perplexity', 'in_batch_dim': [8, 368], 'batch_size': 8, 'flops': 7277217027968}, {'type': 'perplexity', 'in_batch_dim': [8, 160], 'batch_size': 8, 'flops': 3164007441664}, {'type': 'perplexity', 'in_batch_dim': [8, 368], 'batch_size': 8, 'flops': 7277217027968}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3796808916480}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4746011128704}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4113209653888}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4429610391296}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5695213340928}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5695213340928}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5062411866112}, {'type': 'perplexity', 'in_batch_dim': [8, 432], 'batch_size': 8, 'flops': 8542819977600}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5062411866112}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4113209653888}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3480408179072}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3480408179072}, {'type': 'perplexity', 'in_batch_dim': [8, 160], 'batch_size': 8, 'flops': 3164007441664}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4113209653888}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3796808916480}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5062411866112}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 368], 'batch_size': 8, 'flops': 7277217027968}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5378812603520}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5378812603520}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5062411866112}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2847606704256}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4429610391296}, {'type': 'perplexity', 'in_batch_dim': [8, 320], 'batch_size': 8, 'flops': 6328014815744}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3796808916480}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4429610391296}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4113209653888}, {'type': 'perplexity', 'in_batch_dim': [8, 384], 'batch_size': 8, 'flops': 7593617765376}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5378812603520}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4746011128704}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3480408179072}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5062411866112}, {'type': 'perplexity', 'in_batch_dim': [8, 320], 'batch_size': 8, 'flops': 6328014815744}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5378812603520}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4113209653888}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4113209653888}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5695213340928}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3480408179072}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6011614078336}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4746011128704}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5378812603520}, {'type': 'perplexity', 'in_batch_dim': [8, 320], 'batch_size': 8, 'flops': 6328014815744}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4429610391296}, {'type': 'perplexity', 'in_batch_dim': [8, 320], 'batch_size': 8, 'flops': 6328014815744}, {'type': 'perplexity', 'in_batch_dim': [8, 416], 'batch_size': 8, 'flops': 8226419240192}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5062411866112}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5062411866112}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2847606704256}, {'type': 'perplexity', 'in_batch_dim': [8, 336], 'batch_size': 8, 'flops': 6644415553152}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6011614078336}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5062411866112}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4113209653888}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5695213340928}, {'type': 'perplexity', 'in_batch_dim': [8, 416], 'batch_size': 8, 'flops': 8226419240192}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3796808916480}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5695213340928}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4746011128704}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4429610391296}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4113209653888}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3480408179072}, {'type': 'perplexity', 'in_batch_dim': [8, 320], 'batch_size': 8, 'flops': 6328014815744}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3480408179072}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5378812603520}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6011614078336}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4429610391296}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5695213340928}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5695213340928}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3480408179072}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4429610391296}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5062411866112}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3480408179072}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4113209653888}, {'type': 'perplexity', 'in_batch_dim': [8, 352], 'batch_size': 8, 'flops': 6960816290560}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4113209653888}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2847606704256}, {'type': 'perplexity', 'in_batch_dim': [8, 160], 'batch_size': 8, 'flops': 3164007441664}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3796808916480}, {'type': 'perplexity', 'in_batch_dim': [8, 496], 'batch_size': 8, 'flops': 9808422927232}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3480408179072}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4113209653888}, {'type': 'perplexity', 'in_batch_dim': [8, 544], 'batch_size': 8, 'flops': 10757625139456}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6011614078336}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5378812603520}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5695213340928}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4746011128704}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5695213340928}, {'type': 'perplexity', 'in_batch_dim': [8, 384], 'batch_size': 8, 'flops': 7593617765376}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3480408179072}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5378812603520}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3480408179072}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5378812603520}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5062411866112}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3796808916480}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5062411866112}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5695213340928}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5378812603520}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4429610391296}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5062411866112}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3480408179072}, {'type': 'perplexity', 'in_batch_dim': [8, 656], 'batch_size': 8, 'flops': 12972430301312}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5062411866112}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3796808916480}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3480408179072}, {'type': 'perplexity', 'in_batch_dim': [8, 160], 'batch_size': 8, 'flops': 3164007441664}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5695213340928}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6011614078336}, {'type': 'perplexity', 'in_batch_dim': [8, 320], 'batch_size': 8, 'flops': 6328014815744}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5062411866112}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5062411866112}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5695213340928}, {'type': 'perplexity', 'in_batch_dim': [8, 320], 'batch_size': 8, 'flops': 6328014815744}, {'type': 'perplexity', 'in_batch_dim': [8, 384], 'batch_size': 8, 'flops': 7593617765376}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5062411866112}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4746011128704}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4113209653888}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4746011128704}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4746011128704}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3480408179072}, {'type': 'perplexity', 'in_batch_dim': [8, 320], 'batch_size': 8, 'flops': 6328014815744}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5695213340928}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5062411866112}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4113209653888}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4746011128704}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2847606704256}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3796808916480}, {'type': 'perplexity', 'in_batch_dim': [8, 336], 'batch_size': 8, 'flops': 6644415553152}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4113209653888}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4746011128704}, {'type': 'perplexity', 'in_batch_dim': [8, 496], 'batch_size': 8, 'flops': 9808422927232}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5378812603520}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5378812603520}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4429610391296}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5062411866112}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6011614078336}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4746011128704}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3796808916480}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4113209653888}, {'type': 'perplexity', 'in_batch_dim': [8, 496], 'batch_size': 8, 'flops': 9808422927232}, {'type': 'perplexity', 'in_batch_dim': [8, 448], 'batch_size': 8, 'flops': 8859220715008}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6011614078336}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3796808916480}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3480408179072}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5062411866112}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4429610391296}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4113209653888}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4746011128704}, {'type': 'perplexity', 'in_batch_dim': [8, 320], 'batch_size': 8, 'flops': 6328014815744}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6011614078336}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3480408179072}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3796808916480}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3796808916480}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4746011128704}, {'type': 'perplexity', 'in_batch_dim': [8, 416], 'batch_size': 8, 'flops': 8226419240192}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5695213340928}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6011614078336}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6011614078336}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4746011128704}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3480408179072}, {'type': 'perplexity', 'in_batch_dim': [8, 160], 'batch_size': 8, 'flops': 3164007441664}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5695213340928}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3480408179072}, {'type': 'perplexity', 'in_batch_dim': [8, 400], 'batch_size': 8, 'flops': 7910018502784}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4429610391296}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5378812603520}, {'type': 'perplexity', 'in_batch_dim': [8, 400], 'batch_size': 8, 'flops': 7910018502784}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5378812603520}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3480408179072}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6011614078336}, {'type': 'perplexity', 'in_batch_dim': [8, 160], 'batch_size': 8, 'flops': 3164007441664}, {'type': 'perplexity', 'in_batch_dim': [8, 352], 'batch_size': 8, 'flops': 6960816290560}, {'type': 'perplexity', 'in_batch_dim': [8, 320], 'batch_size': 8, 'flops': 6328014815744}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5062411866112}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5062411866112}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4113209653888}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4429610391296}, {'type': 'perplexity', 'in_batch_dim': [8, 352], 'batch_size': 8, 'flops': 6960816290560}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4113209653888}, {'type': 'perplexity', 'in_batch_dim': [8, 160], 'batch_size': 8, 'flops': 3164007441664}, {'type': 'perplexity', 'in_batch_dim': [8, 336], 'batch_size': 8, 'flops': 6644415553152}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4429610391296}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3796808916480}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5378812603520}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5378812603520}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4113209653888}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5378812603520}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3796808916480}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5378812603520}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5378812603520}, {'type': 'perplexity', 'in_batch_dim': [8, 160], 'batch_size': 8, 'flops': 3164007441664}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3480408179072}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6011614078336}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5695213340928}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5378812603520}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5378812603520}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4429610391296}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4746011128704}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6011614078336}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5062411866112}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6011614078336}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5378812603520}, {'type': 'perplexity', 'in_batch_dim': [8, 160], 'batch_size': 8, 'flops': 3164007441664}, {'type': 'perplexity', 'in_batch_dim': [8, 160], 'batch_size': 8, 'flops': 3164007441664}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5062411866112}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5062411866112}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3796808916480}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6011614078336}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4113209653888}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4746011128704}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5378812603520}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3480408179072}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3796808916480}, {'type': 'perplexity', 'in_batch_dim': [8, 320], 'batch_size': 8, 'flops': 6328014815744}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5062411866112}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4746011128704}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5062411866112}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4746011128704}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6011614078336}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3796808916480}, {'type': 'perplexity', 'in_batch_dim': [8, 160], 'batch_size': 8, 'flops': 3164007441664}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2847606704256}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4429610391296}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4746011128704}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5062411866112}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5378812603520}, {'type': 'perplexity', 'in_batch_dim': [8, 464], 'batch_size': 8, 'flops': 9175621452416}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4113209653888}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3480408179072}, {'type': 'perplexity', 'in_batch_dim': [8, 384], 'batch_size': 8, 'flops': 7593617765376}, {'type': 'perplexity', 'in_batch_dim': [8, 448], 'batch_size': 8, 'flops': 8859220715008}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5378812603520}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4113209653888}, {'type': 'perplexity', 'in_batch_dim': [8, 320], 'batch_size': 8, 'flops': 6328014815744}, {'type': 'perplexity', 'in_batch_dim': [8, 560], 'batch_size': 8, 'flops': 11074025876864}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5378812603520}, {'type': 'perplexity', 'in_batch_dim': [8, 160], 'batch_size': 8, 'flops': 3164007441664}, {'type': 'perplexity', 'in_batch_dim': [8, 320], 'batch_size': 8, 'flops': 6328014815744}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5062411866112}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6011614078336}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4746011128704}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6011614078336}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3796808916480}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4429610391296}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4113209653888}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4746011128704}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3480408179072}, {'type': 'perplexity', 'in_batch_dim': [8, 384], 'batch_size': 8, 'flops': 7593617765376}, {'type': 'perplexity', 'in_batch_dim': [8, 576], 'batch_size': 8, 'flops': 11390426614272}, {'type': 'perplexity', 'in_batch_dim': [8, 160], 'batch_size': 8, 'flops': 3164007441664}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4746011128704}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4746011128704}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4429610391296}, {'type': 'perplexity', 'in_batch_dim': [8, 336], 'batch_size': 8, 'flops': 6644415553152}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4746011128704}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3796808916480}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3796808916480}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5062411866112}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5378812603520}, {'type': 'perplexity', 'in_batch_dim': [8, 368], 'batch_size': 8, 'flops': 7277217027968}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3480408179072}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4113209653888}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4746011128704}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3480408179072}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4746011128704}, {'type': 'perplexity', 'in_batch_dim': [8, 1168], 'batch_size': 8, 'flops': 23097253898368}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5062411866112}, {'type': 'perplexity', 'in_batch_dim': [8, 160], 'batch_size': 8, 'flops': 3164007441664}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4746011128704}, {'type': 'perplexity', 'in_batch_dim': [8, 640], 'batch_size': 8, 'flops': 12656029563904}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5378812603520}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4429610391296}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4429610391296}, {'type': 'perplexity', 'in_batch_dim': [8, 384], 'batch_size': 8, 'flops': 7593617765376}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3480408179072}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4113209653888}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3796808916480}, {'type': 'perplexity', 'in_batch_dim': [8, 336], 'batch_size': 8, 'flops': 6644415553152}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3480408179072}, {'type': 'perplexity', 'in_batch_dim': [6, 208], 'batch_size': 8, 'flops': 4113209653888}], 'timestamp': '2025-09-10 02:23:06.754647', 'step': 3381, 'epoch': 2} {'type': 'pplx', 'content': 19503867.643994175, 'timestamp': '2025-09-10 02:23:06.757515', 'step': 3381, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 416], 'flops': 12339959612288}, 'timestamp': '2025-09-10 02:23:06.794649', 'step': 3381, 'epoch': 2} {'type': 'loss', 'content': 0.004555174149572849, 'timestamp': '2025-09-10 02:23:06.810538', 'step': 3382, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 336], 'flops': 9966940982208}, 'timestamp': '2025-09-10 02:23:06.845093', 'step': 3382, 'epoch': 2} {'type': 'loss', 'content': 0.012583248317241669, 'timestamp': '2025-09-10 02:23:06.858473', 'step': 3383, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 304], 'flops': 9017733530176}, 'timestamp': '2025-09-10 02:23:06.890181', 'step': 3383, 'epoch': 2} {'type': 'loss', 'content': 0.03392893821001053, 'timestamp': '2025-09-10 02:23:06.922817', 'step': 3384, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 384], 'flops': 11390752160256}, 'timestamp': '2025-09-10 02:23:06.957562', 'step': 3384, 'epoch': 2} {'type': 'loss', 'content': 0.004845472984015942, 'timestamp': '2025-09-10 02:23:06.970855', 'step': 3385, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 8543129804160}, 'timestamp': '2025-09-10 02:23:07.005311', 'step': 3385, 'epoch': 2} {'type': 'loss', 'content': 0.0021397171076387167, 'timestamp': '2025-09-10 02:23:07.015713', 'step': 3386, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 8068526078144}, 'timestamp': '2025-09-10 02:23:07.050165', 'step': 3386, 'epoch': 2} {'type': 'loss', 'content': 0.012018001638352871, 'timestamp': '2025-09-10 02:23:07.059697', 'step': 3387, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 5695507448064}, 'timestamp': '2025-09-10 02:23:07.092906', 'step': 3387, 'epoch': 2} {'type': 'loss', 'content': 0.004577454179525375, 'timestamp': '2025-09-10 02:23:07.117881', 'step': 3388, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 4746299996032}, 'timestamp': '2025-09-10 02:23:07.149652', 'step': 3388, 'epoch': 2} {'type': 'loss', 'content': 0.010535781271755695, 'timestamp': '2025-09-10 02:23:07.151782', 'step': 3389, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 6644714900096}, 'timestamp': '2025-09-10 02:23:07.183243', 'step': 3389, 'epoch': 2} {'type': 'loss', 'content': 0.005816023796796799, 'timestamp': '2025-09-10 02:23:07.190017', 'step': 3390, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 4746299996032}, 'timestamp': '2025-09-10 02:23:07.221365', 'step': 3390, 'epoch': 2} {'type': 'loss', 'content': 0.00614953925833106, 'timestamp': '2025-09-10 02:23:07.223727', 'step': 3391, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 7119318626112}, 'timestamp': '2025-09-10 02:23:07.255452', 'step': 3391, 'epoch': 2} {'type': 'loss', 'content': 0.003558420343324542, 'timestamp': '2025-09-10 02:23:07.283727', 'step': 3392, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 6644714900096}, 'timestamp': '2025-09-10 02:23:07.314913', 'step': 3392, 'epoch': 2} {'type': 'loss', 'content': 0.005937974434345961, 'timestamp': '2025-09-10 02:23:07.319177', 'step': 3393, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 320], 'flops': 9492337256192}, 'timestamp': '2025-09-10 02:23:07.351187', 'step': 3393, 'epoch': 2} {'type': 'loss', 'content': 0.01408356986939907, 'timestamp': '2025-09-10 02:23:07.363532', 'step': 3394, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 176], 'flops': 5220903722048}, 'timestamp': '2025-09-10 02:23:07.394337', 'step': 3394, 'epoch': 2} {'type': 'loss', 'content': 0.0014379842905327678, 'timestamp': '2025-09-10 02:23:07.398046', 'step': 3395, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 5695507448064}, 'timestamp': '2025-09-10 02:23:07.429030', 'step': 3395, 'epoch': 2} {'type': 'loss', 'content': 0.0067471894435584545, 'timestamp': '2025-09-10 02:23:07.454158', 'step': 3396, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 304], 'flops': 9017733530176}, 'timestamp': '2025-09-10 02:23:07.486942', 'step': 3396, 'epoch': 2} {'type': 'loss', 'content': 0.013919052667915821, 'timestamp': '2025-09-10 02:23:07.495675', 'step': 3397, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 384], 'flops': 11390752160256}, 'timestamp': '2025-09-10 02:23:07.530583', 'step': 3397, 'epoch': 2} {'type': 'loss', 'content': 0.0027160770259797573, 'timestamp': '2025-09-10 02:23:07.544500', 'step': 3398, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 8068526078144}, 'timestamp': '2025-09-10 02:23:07.577910', 'step': 3398, 'epoch': 2} {'type': 'loss', 'content': 0.004795930348336697, 'timestamp': '2025-09-10 02:23:07.587618', 'step': 3399, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 7119318626112}, 'timestamp': '2025-09-10 02:23:07.619070', 'step': 3399, 'epoch': 2} {'type': 'loss', 'content': 0.005166996270418167, 'timestamp': '2025-09-10 02:23:07.647255', 'step': 3400, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 7119318626112}, 'timestamp': '2025-09-10 02:23:07.689699', 'step': 3400, 'epoch': 2} {'type': 'loss', 'content': 0.0037923615891486406, 'timestamp': '2025-09-10 02:23:07.694920', 'step': 3401, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 304], 'flops': 9017733530176}, 'timestamp': '2025-09-10 02:23:07.725938', 'step': 3401, 'epoch': 2} {'type': 'loss', 'content': 0.003060044953599572, 'timestamp': '2025-09-10 02:23:07.737891', 'step': 3402, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 432], 'flops': 12814563338304}, 'timestamp': '2025-09-10 02:23:07.778578', 'step': 3402, 'epoch': 2} {'type': 'loss', 'content': 0.001733014010824263, 'timestamp': '2025-09-10 02:23:07.794729', 'step': 3403, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 7119318626112}, 'timestamp': '2025-09-10 02:23:07.825397', 'step': 3403, 'epoch': 2} {'type': 'loss', 'content': 0.0021855314262211323, 'timestamp': '2025-09-10 02:23:07.853881', 'step': 3404, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 6644714900096}, 'timestamp': '2025-09-10 02:23:07.887128', 'step': 3404, 'epoch': 2} {'type': 'loss', 'content': 0.03904338553547859, 'timestamp': '2025-09-10 02:23:07.891696', 'step': 3405, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 304], 'flops': 9017733530176}, 'timestamp': '2025-09-10 02:23:07.922344', 'step': 3405, 'epoch': 2} {'type': 'loss', 'content': 0.007601436227560043, 'timestamp': '2025-09-10 02:23:07.934606', 'step': 3406, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 7119318626112}, 'timestamp': '2025-09-10 02:23:07.966596', 'step': 3406, 'epoch': 2} {'type': 'loss', 'content': 0.009114629589021206, 'timestamp': '2025-09-10 02:23:07.973986', 'step': 3407, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 8068526078144}, 'timestamp': '2025-09-10 02:23:08.005183', 'step': 3407, 'epoch': 2} {'type': 'loss', 'content': 0.021965792402625084, 'timestamp': '2025-09-10 02:23:08.036150', 'step': 3408, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 336], 'flops': 9966940982208}, 'timestamp': '2025-09-10 02:23:08.068219', 'step': 3408, 'epoch': 2} {'type': 'loss', 'content': 0.007908275350928307, 'timestamp': '2025-09-10 02:23:08.081050', 'step': 3409, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 176], 'flops': 5220903722048}, 'timestamp': '2025-09-10 02:23:08.112268', 'step': 3409, 'epoch': 2} {'type': 'loss', 'content': 0.009908582083880901, 'timestamp': '2025-09-10 02:23:08.116513', 'step': 3410, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 336], 'flops': 9966940982208}, 'timestamp': '2025-09-10 02:23:08.150016', 'step': 3410, 'epoch': 2} {'type': 'loss', 'content': 0.006652395240962505, 'timestamp': '2025-09-10 02:23:08.163360', 'step': 3411, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 8068526078144}, 'timestamp': '2025-09-10 02:23:08.195014', 'step': 3411, 'epoch': 2} {'type': 'loss', 'content': 0.005871969740837812, 'timestamp': '2025-09-10 02:23:08.225585', 'step': 3412, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 7593922352128}, 'timestamp': '2025-09-10 02:23:08.257697', 'step': 3412, 'epoch': 2} {'type': 'loss', 'content': 0.007932118140161037, 'timestamp': '2025-09-10 02:23:08.262776', 'step': 3413, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 7593922352128}, 'timestamp': '2025-09-10 02:23:08.294989', 'step': 3413, 'epoch': 2} {'type': 'loss', 'content': 0.011967500671744347, 'timestamp': '2025-09-10 02:23:08.302832', 'step': 3414, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 7593922352128}, 'timestamp': '2025-09-10 02:23:08.334066', 'step': 3414, 'epoch': 2} {'type': 'loss', 'content': 0.0017909369198605418, 'timestamp': '2025-09-10 02:23:08.341717', 'step': 3415, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 7593922352128}, 'timestamp': '2025-09-10 02:23:08.372844', 'step': 3415, 'epoch': 2} {'type': 'loss', 'content': 0.007196805439889431, 'timestamp': '2025-09-10 02:23:08.401415', 'step': 3416, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 7593922352128}, 'timestamp': '2025-09-10 02:23:08.432415', 'step': 3416, 'epoch': 2} {'type': 'loss', 'content': 0.005088069010525942, 'timestamp': '2025-09-10 02:23:08.437571', 'step': 3417, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 5695507448064}, 'timestamp': '2025-09-10 02:23:08.468347', 'step': 3417, 'epoch': 2} {'type': 'loss', 'content': 0.011828926391899586, 'timestamp': '2025-09-10 02:23:08.472861', 'step': 3418, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 352], 'flops': 10441544708224}, 'timestamp': '2025-09-10 02:23:08.507358', 'step': 3418, 'epoch': 2} {'type': 'loss', 'content': 0.0015730817103758454, 'timestamp': '2025-09-10 02:23:08.520988', 'step': 3419, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 8068526078144}, 'timestamp': '2025-09-10 02:23:08.553080', 'step': 3419, 'epoch': 2} {'type': 'loss', 'content': 0.010220969095826149, 'timestamp': '2025-09-10 02:23:08.583498', 'step': 3420, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 480], 'flops': 14238374516352}, 'timestamp': '2025-09-10 02:23:08.623942', 'step': 3420, 'epoch': 2} {'type': 'loss', 'content': 0.0031598855275660753, 'timestamp': '2025-09-10 02:23:08.640953', 'step': 3421, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 6644714900096}, 'timestamp': '2025-09-10 02:23:08.672646', 'step': 3421, 'epoch': 2} {'type': 'loss', 'content': 0.028410514816641808, 'timestamp': '2025-09-10 02:23:08.679740', 'step': 3422, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 6170111174080}, 'timestamp': '2025-09-10 02:23:08.710963', 'step': 3422, 'epoch': 2} {'type': 'loss', 'content': 0.003635372733697295, 'timestamp': '2025-09-10 02:23:08.717809', 'step': 3423, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 6170111174080}, 'timestamp': '2025-09-10 02:23:08.748767', 'step': 3423, 'epoch': 2} {'type': 'loss', 'content': 0.010761960409581661, 'timestamp': '2025-09-10 02:23:08.776510', 'step': 3424, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 8068526078144}, 'timestamp': '2025-09-10 02:23:08.807771', 'step': 3424, 'epoch': 2} {'type': 'loss', 'content': 0.0029311534017324448, 'timestamp': '2025-09-10 02:23:08.815772', 'step': 3425, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 7119318626112}, 'timestamp': '2025-09-10 02:23:08.846343', 'step': 3425, 'epoch': 2} {'type': 'loss', 'content': 0.005955227185040712, 'timestamp': '2025-09-10 02:23:08.853700', 'step': 3426, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 6644714900096}, 'timestamp': '2025-09-10 02:23:08.887227', 'step': 3426, 'epoch': 2} {'type': 'loss', 'content': 0.0019082642393186688, 'timestamp': '2025-09-10 02:23:08.894193', 'step': 3427, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 8068526078144}, 'timestamp': '2025-09-10 02:23:08.927241', 'step': 3427, 'epoch': 2} {'type': 'loss', 'content': 0.036622676998376846, 'timestamp': '2025-09-10 02:23:08.958448', 'step': 3428, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 7593922352128}, 'timestamp': '2025-09-10 02:23:08.991248', 'step': 3428, 'epoch': 2} {'type': 'loss', 'content': 0.016960853710770607, 'timestamp': '2025-09-10 02:23:08.996726', 'step': 3429, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 7593922352128}, 'timestamp': '2025-09-10 02:23:09.026889', 'step': 3429, 'epoch': 2} {'type': 'loss', 'content': 0.013002237305045128, 'timestamp': '2025-09-10 02:23:09.034596', 'step': 3430, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 6170111174080}, 'timestamp': '2025-09-10 02:23:09.065070', 'step': 3430, 'epoch': 2} {'type': 'loss', 'content': 0.008839133195579052, 'timestamp': '2025-09-10 02:23:09.071772', 'step': 3431, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 6170111174080}, 'timestamp': '2025-09-10 02:23:09.104799', 'step': 3431, 'epoch': 2} {'type': 'loss', 'content': 0.01928016170859337, 'timestamp': '2025-09-10 02:23:09.132528', 'step': 3432, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 5695507448064}, 'timestamp': '2025-09-10 02:23:09.163379', 'step': 3432, 'epoch': 2} {'type': 'loss', 'content': 0.0017680247547104955, 'timestamp': '2025-09-10 02:23:09.165668', 'step': 3433, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 4746299996032}, 'timestamp': '2025-09-10 02:23:09.195755', 'step': 3433, 'epoch': 2} {'type': 'loss', 'content': 0.0012528672814369202, 'timestamp': '2025-09-10 02:23:09.198349', 'step': 3434, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 6644714900096}, 'timestamp': '2025-09-10 02:23:09.228746', 'step': 3434, 'epoch': 2} {'type': 'loss', 'content': 0.0041799359023571014, 'timestamp': '2025-09-10 02:23:09.235932', 'step': 3435, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 7593922352128}, 'timestamp': '2025-09-10 02:23:09.266783', 'step': 3435, 'epoch': 2} {'type': 'loss', 'content': 0.00306068011559546, 'timestamp': '2025-09-10 02:23:09.295547', 'step': 3436, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 7593922352128}, 'timestamp': '2025-09-10 02:23:09.326486', 'step': 3436, 'epoch': 2} {'type': 'loss', 'content': 0.0031013197731226683, 'timestamp': '2025-09-10 02:23:09.331936', 'step': 3437, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 6644714900096}, 'timestamp': '2025-09-10 02:23:09.362497', 'step': 3437, 'epoch': 2} {'type': 'loss', 'content': 0.001428862102329731, 'timestamp': '2025-09-10 02:23:09.369800', 'step': 3438, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 368], 'flops': 10916148434240}, 'timestamp': '2025-09-10 02:23:09.404365', 'step': 3438, 'epoch': 2} {'type': 'loss', 'content': 0.0005541969439946115, 'timestamp': '2025-09-10 02:23:09.418183', 'step': 3439, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 7593922352128}, 'timestamp': '2025-09-10 02:23:09.449030', 'step': 3439, 'epoch': 2} {'type': 'loss', 'content': 0.004622712731361389, 'timestamp': '2025-09-10 02:23:09.478004', 'step': 3440, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 6170111174080}, 'timestamp': '2025-09-10 02:23:09.509849', 'step': 3440, 'epoch': 2} {'type': 'loss', 'content': 0.0019380019512027502, 'timestamp': '2025-09-10 02:23:09.514406', 'step': 3441, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 8543129804160}, 'timestamp': '2025-09-10 02:23:09.545872', 'step': 3441, 'epoch': 2} {'type': 'loss', 'content': 0.003715726314112544, 'timestamp': '2025-09-10 02:23:09.556902', 'step': 3442, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 6170111174080}, 'timestamp': '2025-09-10 02:23:09.588546', 'step': 3442, 'epoch': 2} {'type': 'loss', 'content': 0.006264827214181423, 'timestamp': '2025-09-10 02:23:09.595030', 'step': 3443, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 8543129804160}, 'timestamp': '2025-09-10 02:23:09.627432', 'step': 3443, 'epoch': 2} {'type': 'loss', 'content': 0.004857002291828394, 'timestamp': '2025-09-10 02:23:09.658245', 'step': 3444, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 8068526078144}, 'timestamp': '2025-09-10 02:23:09.693492', 'step': 3444, 'epoch': 2} {'type': 'loss', 'content': 0.002044878900051117, 'timestamp': '2025-09-10 02:23:09.700332', 'step': 3445, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 7119318626112}, 'timestamp': '2025-09-10 02:23:09.733066', 'step': 3445, 'epoch': 2} {'type': 'loss', 'content': 0.04166651517152786, 'timestamp': '2025-09-10 02:23:09.739766', 'step': 3446, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 8543129804160}, 'timestamp': '2025-09-10 02:23:09.772160', 'step': 3446, 'epoch': 2} {'type': 'loss', 'content': 0.0031065084040164948, 'timestamp': '2025-09-10 02:23:09.782139', 'step': 3447, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 6644714900096}, 'timestamp': '2025-09-10 02:23:09.816534', 'step': 3447, 'epoch': 2} {'type': 'loss', 'content': 0.0012844757875427604, 'timestamp': '2025-09-10 02:23:09.843914', 'step': 3448, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 8543129804160}, 'timestamp': '2025-09-10 02:23:09.875547', 'step': 3448, 'epoch': 2} {'type': 'loss', 'content': 0.0019296734826639295, 'timestamp': '2025-09-10 02:23:09.883584', 'step': 3449, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 304], 'flops': 9017733530176}, 'timestamp': '2025-09-10 02:23:09.916452', 'step': 3449, 'epoch': 2} {'type': 'loss', 'content': 0.0020907416474074125, 'timestamp': '2025-09-10 02:23:09.927687', 'step': 3450, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 6170111174080}, 'timestamp': '2025-09-10 02:23:09.960487', 'step': 3450, 'epoch': 2} {'type': 'loss', 'content': 0.002724104793742299, 'timestamp': '2025-09-10 02:23:09.967106', 'step': 3451, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 8068526078144}, 'timestamp': '2025-09-10 02:23:09.999304', 'step': 3451, 'epoch': 2} {'type': 'loss', 'content': 0.012764266692101955, 'timestamp': '2025-09-10 02:23:10.029740', 'step': 3452, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 176], 'flops': 5220903722048}, 'timestamp': '2025-09-10 02:23:10.061699', 'step': 3452, 'epoch': 2} {'type': 'loss', 'content': 0.005641660653054714, 'timestamp': '2025-09-10 02:23:10.063917', 'step': 3453, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 6170111174080}, 'timestamp': '2025-09-10 02:23:10.096530', 'step': 3453, 'epoch': 2} {'type': 'loss', 'content': 0.003335190238431096, 'timestamp': '2025-09-10 02:23:10.102996', 'step': 3454, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 320], 'flops': 9492337256192}, 'timestamp': '2025-09-10 02:23:10.145804', 'step': 3454, 'epoch': 2} {'type': 'loss', 'content': 0.0014257727889344096, 'timestamp': '2025-09-10 02:23:10.157428', 'step': 3455, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 400], 'flops': 11865355886272}, 'timestamp': '2025-09-10 02:23:10.196714', 'step': 3455, 'epoch': 2} {'type': 'loss', 'content': 0.005661274306476116, 'timestamp': '2025-09-10 02:23:10.233271', 'step': 3456, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 176], 'flops': 5220903722048}, 'timestamp': '2025-09-10 02:23:10.269812', 'step': 3456, 'epoch': 2} {'type': 'loss', 'content': 0.0007868251414038241, 'timestamp': '2025-09-10 02:23:10.272408', 'step': 3457, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 7593922352128}, 'timestamp': '2025-09-10 02:23:10.311148', 'step': 3457, 'epoch': 2} {'type': 'loss', 'content': 0.0019356502452865243, 'timestamp': '2025-09-10 02:23:10.318900', 'step': 3458, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 176], 'flops': 5220903722048}, 'timestamp': '2025-09-10 02:23:10.349817', 'step': 3458, 'epoch': 2} {'type': 'loss', 'content': 0.006856503430753946, 'timestamp': '2025-09-10 02:23:10.357731', 'step': 3459, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 336], 'flops': 9966940982208}, 'timestamp': '2025-09-10 02:23:10.394105', 'step': 3459, 'epoch': 2} {'type': 'loss', 'content': 0.0010916010942310095, 'timestamp': '2025-09-10 02:23:10.428313', 'step': 3460, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 8068526078144}, 'timestamp': '2025-09-10 02:23:10.463463', 'step': 3460, 'epoch': 2} {'type': 'loss', 'content': 0.013903248123824596, 'timestamp': '2025-09-10 02:23:10.469270', 'step': 3461, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 5695507448064}, 'timestamp': '2025-09-10 02:23:10.502983', 'step': 3461, 'epoch': 2} {'type': 'loss', 'content': 0.002239000052213669, 'timestamp': '2025-09-10 02:23:10.506401', 'step': 3462, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 4746299996032}, 'timestamp': '2025-09-10 02:23:10.541230', 'step': 3462, 'epoch': 2} {'type': 'loss', 'content': 0.0007613528869114816, 'timestamp': '2025-09-10 02:23:10.543512', 'step': 3463, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 352], 'flops': 10441544708224}, 'timestamp': '2025-09-10 02:23:10.581849', 'step': 3463, 'epoch': 2} {'type': 'loss', 'content': 0.0012689571594819427, 'timestamp': '2025-09-10 02:23:10.616370', 'step': 3464, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 304], 'flops': 9017733530176}, 'timestamp': '2025-09-10 02:23:10.663739', 'step': 3464, 'epoch': 2} {'type': 'loss', 'content': 0.0037759561091661453, 'timestamp': '2025-09-10 02:23:10.671022', 'step': 3465, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 6170111174080}, 'timestamp': '2025-09-10 02:23:10.714116', 'step': 3465, 'epoch': 2} {'type': 'loss', 'content': 0.0006281206151470542, 'timestamp': '2025-09-10 02:23:10.719764', 'step': 3466, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 7593922352128}, 'timestamp': '2025-09-10 02:23:10.752091', 'step': 3466, 'epoch': 2} {'type': 'loss', 'content': 0.000767476565670222, 'timestamp': '2025-09-10 02:23:10.759522', 'step': 3467, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 5695507448064}, 'timestamp': '2025-09-10 02:23:10.793859', 'step': 3467, 'epoch': 2} {'type': 'loss', 'content': 0.000946753949392587, 'timestamp': '2025-09-10 02:23:10.819850', 'step': 3468, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 176], 'flops': 5220903722048}, 'timestamp': '2025-09-10 02:23:10.850932', 'step': 3468, 'epoch': 2} {'type': 'loss', 'content': 0.014056609943509102, 'timestamp': '2025-09-10 02:23:10.853224', 'step': 3469, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 6170111174080}, 'timestamp': '2025-09-10 02:23:10.886985', 'step': 3469, 'epoch': 2} {'type': 'loss', 'content': 0.0004704651073552668, 'timestamp': '2025-09-10 02:23:10.894094', 'step': 3470, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 6644714900096}, 'timestamp': '2025-09-10 02:23:10.926723', 'step': 3470, 'epoch': 2} {'type': 'loss', 'content': 0.0011611180379986763, 'timestamp': '2025-09-10 02:23:10.933919', 'step': 3471, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 7119318626112}, 'timestamp': '2025-09-10 02:23:10.972466', 'step': 3471, 'epoch': 2} {'type': 'loss', 'content': 0.004651397932320833, 'timestamp': '2025-09-10 02:23:11.000688', 'step': 3472, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 7593922352128}, 'timestamp': '2025-09-10 02:23:11.033209', 'step': 3472, 'epoch': 2} {'type': 'loss', 'content': 0.0010988789144903421, 'timestamp': '2025-09-10 02:23:11.038778', 'step': 3473, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 5695507448064}, 'timestamp': '2025-09-10 02:23:11.069426', 'step': 3473, 'epoch': 2} {'type': 'loss', 'content': 0.001688135089352727, 'timestamp': '2025-09-10 02:23:11.073789', 'step': 3474, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 5695507448064}, 'timestamp': '2025-09-10 02:23:11.105988', 'step': 3474, 'epoch': 2} {'type': 'loss', 'content': 0.008201858960092068, 'timestamp': '2025-09-10 02:23:11.110507', 'step': 3475, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 5695507448064}, 'timestamp': '2025-09-10 02:23:11.142400', 'step': 3475, 'epoch': 2} {'type': 'loss', 'content': 0.0017634114483371377, 'timestamp': '2025-09-10 02:23:11.167876', 'step': 3476, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 7593922352128}, 'timestamp': '2025-09-10 02:23:11.198653', 'step': 3476, 'epoch': 2} {'type': 'loss', 'content': 0.002664331579580903, 'timestamp': '2025-09-10 02:23:11.204161', 'step': 3477, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 6644714900096}, 'timestamp': '2025-09-10 02:23:11.235062', 'step': 3477, 'epoch': 2} {'type': 'loss', 'content': 0.0006722270627506077, 'timestamp': '2025-09-10 02:23:11.242056', 'step': 3478, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 176], 'flops': 5220903722048}, 'timestamp': '2025-09-10 02:23:11.272417', 'step': 3478, 'epoch': 2} {'type': 'loss', 'content': 0.0010332902893424034, 'timestamp': '2025-09-10 02:23:11.276778', 'step': 3479, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 7593922352128}, 'timestamp': '2025-09-10 02:23:11.308084', 'step': 3479, 'epoch': 2} {'type': 'loss', 'content': 0.0014329560799524188, 'timestamp': '2025-09-10 02:23:11.337032', 'step': 3480, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 7593922352128}, 'timestamp': '2025-09-10 02:23:11.367640', 'step': 3480, 'epoch': 2} {'type': 'loss', 'content': 0.009547821246087551, 'timestamp': '2025-09-10 02:23:11.373296', 'step': 3481, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 336], 'flops': 9966940982208}, 'timestamp': '2025-09-10 02:23:11.406835', 'step': 3481, 'epoch': 2} {'type': 'loss', 'content': 0.015632616356015205, 'timestamp': '2025-09-10 02:23:11.420229', 'step': 3482, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 6644714900096}, 'timestamp': '2025-09-10 02:23:11.451294', 'step': 3482, 'epoch': 2} {'type': 'loss', 'content': 0.0008822629461064935, 'timestamp': '2025-09-10 02:23:11.458362', 'step': 3483, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 176], 'flops': 5220903722048}, 'timestamp': '2025-09-10 02:23:11.507670', 'step': 3483, 'epoch': 2} {'type': 'loss', 'content': 0.024470632895827293, 'timestamp': '2025-09-10 02:23:11.532646', 'step': 3484, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 7119318626112}, 'timestamp': '2025-09-10 02:23:11.563413', 'step': 3484, 'epoch': 2} {'type': 'loss', 'content': 0.002709366148337722, 'timestamp': '2025-09-10 02:23:11.568540', 'step': 3485, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 336], 'flops': 9966940982208}, 'timestamp': '2025-09-10 02:23:11.602144', 'step': 3485, 'epoch': 2} {'type': 'loss', 'content': 0.0006141713238321245, 'timestamp': '2025-09-10 02:23:11.615536', 'step': 3486, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 7119318626112}, 'timestamp': '2025-09-10 02:23:11.646966', 'step': 3486, 'epoch': 2} {'type': 'loss', 'content': 0.00025127717526629567, 'timestamp': '2025-09-10 02:23:11.654566', 'step': 3487, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 8068526078144}, 'timestamp': '2025-09-10 02:23:11.689017', 'step': 3487, 'epoch': 2} {'type': 'loss', 'content': 0.0025279794353991747, 'timestamp': '2025-09-10 02:23:11.720345', 'step': 3488, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 176], 'flops': 5220903722048}, 'timestamp': '2025-09-10 02:23:11.751908', 'step': 3488, 'epoch': 2} {'type': 'loss', 'content': 0.0007817599689587951, 'timestamp': '2025-09-10 02:23:11.754063', 'step': 3489, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 6644714900096}, 'timestamp': '2025-09-10 02:23:11.785830', 'step': 3489, 'epoch': 2} {'type': 'loss', 'content': 0.0025948244147002697, 'timestamp': '2025-09-10 02:23:11.792944', 'step': 3490, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 6644714900096}, 'timestamp': '2025-09-10 02:23:11.823703', 'step': 3490, 'epoch': 2} {'type': 'loss', 'content': 0.0002477052912581712, 'timestamp': '2025-09-10 02:23:11.830840', 'step': 3491, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 6644714900096}, 'timestamp': '2025-09-10 02:23:11.861527', 'step': 3491, 'epoch': 2} {'type': 'loss', 'content': 0.020622704178094864, 'timestamp': '2025-09-10 02:23:11.889421', 'step': 3492, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 7119318626112}, 'timestamp': '2025-09-10 02:23:11.920080', 'step': 3492, 'epoch': 2} {'type': 'loss', 'content': 0.0009450044599361718, 'timestamp': '2025-09-10 02:23:11.925297', 'step': 3493, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 400], 'flops': 11865355886272}, 'timestamp': '2025-09-10 02:23:11.963272', 'step': 3493, 'epoch': 2} {'type': 'loss', 'content': 0.004593479912728071, 'timestamp': '2025-09-10 02:23:11.978930', 'step': 3494, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 5695507448064}, 'timestamp': '2025-09-10 02:23:12.011166', 'step': 3494, 'epoch': 2} {'type': 'loss', 'content': 0.0003686284471768886, 'timestamp': '2025-09-10 02:23:12.015801', 'step': 3495, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 352], 'flops': 10441544708224}, 'timestamp': '2025-09-10 02:23:12.051195', 'step': 3495, 'epoch': 2} {'type': 'loss', 'content': 0.0008759652846492827, 'timestamp': '2025-09-10 02:23:12.085794', 'step': 3496, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 4746299996032}, 'timestamp': '2025-09-10 02:23:12.116178', 'step': 3496, 'epoch': 2} {'type': 'loss', 'content': 0.002225195523351431, 'timestamp': '2025-09-10 02:23:12.118332', 'step': 3497, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 7593922352128}, 'timestamp': '2025-09-10 02:23:12.148949', 'step': 3497, 'epoch': 2} {'type': 'loss', 'content': 0.0005732810823246837, 'timestamp': '2025-09-10 02:23:12.156800', 'step': 3498, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 8543129804160}, 'timestamp': '2025-09-10 02:23:12.188215', 'step': 3498, 'epoch': 2} {'type': 'loss', 'content': 0.0018855527741834521, 'timestamp': '2025-09-10 02:23:12.199185', 'step': 3499, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 7119318626112}, 'timestamp': '2025-09-10 02:23:12.230391', 'step': 3499, 'epoch': 2} {'type': 'loss', 'content': 0.00036894562072120607, 'timestamp': '2025-09-10 02:23:12.258639', 'step': 3500, 'epoch': 2} {'type': 'info', 'content': 'Checkpoint saved at step 3500', 'timestamp': '2025-09-10 02:23:16.939336', 'step': 3500, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 7593922352128}, 'timestamp': '2025-09-10 02:23:16.972263', 'step': 3500, 'epoch': 2} {'type': 'loss', 'content': 0.0015905782347545028, 'timestamp': '2025-09-10 02:23:16.976744', 'step': 3501, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 6644714900096}, 'timestamp': '2025-09-10 02:23:17.011135', 'step': 3501, 'epoch': 2} {'type': 'loss', 'content': 0.0011544699082151055, 'timestamp': '2025-09-10 02:23:17.017976', 'step': 3502, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 4746299996032}, 'timestamp': '2025-09-10 02:23:17.049103', 'step': 3502, 'epoch': 2} {'type': 'loss', 'content': 0.00034944407525472343, 'timestamp': '2025-09-10 02:23:17.051563', 'step': 3503, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 6644714900096}, 'timestamp': '2025-09-10 02:23:17.081918', 'step': 3503, 'epoch': 2} {'type': 'loss', 'content': 0.00036820146488025784, 'timestamp': '2025-09-10 02:23:17.109738', 'step': 3504, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 5695507448064}, 'timestamp': '2025-09-10 02:23:17.142389', 'step': 3504, 'epoch': 2} {'type': 'loss', 'content': 0.00649291044101119, 'timestamp': '2025-09-10 02:23:17.144832', 'step': 3505, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 7593922352128}, 'timestamp': '2025-09-10 02:23:17.176811', 'step': 3505, 'epoch': 2} {'type': 'loss', 'content': 0.009862485341727734, 'timestamp': '2025-09-10 02:23:17.184533', 'step': 3506, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 7593922352128}, 'timestamp': '2025-09-10 02:23:17.215687', 'step': 3506, 'epoch': 2} {'type': 'loss', 'content': 0.0013766074553132057, 'timestamp': '2025-09-10 02:23:17.223525', 'step': 3507, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 7119318626112}, 'timestamp': '2025-09-10 02:23:17.254771', 'step': 3507, 'epoch': 2} {'type': 'loss', 'content': 0.004568330943584442, 'timestamp': '2025-09-10 02:23:17.283323', 'step': 3508, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 304], 'flops': 9017733530176}, 'timestamp': '2025-09-10 02:23:17.318817', 'step': 3508, 'epoch': 2} {'type': 'loss', 'content': 0.012078741565346718, 'timestamp': '2025-09-10 02:23:17.328682', 'step': 3509, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 6644714900096}, 'timestamp': '2025-09-10 02:23:17.359784', 'step': 3509, 'epoch': 2} {'type': 'loss', 'content': 0.0004991721361875534, 'timestamp': '2025-09-10 02:23:17.367115', 'step': 3510, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 6170111174080}, 'timestamp': '2025-09-10 02:23:17.397399', 'step': 3510, 'epoch': 2} {'type': 'loss', 'content': 0.0006359159597195685, 'timestamp': '2025-09-10 02:23:17.404120', 'step': 3511, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 8068526078144}, 'timestamp': '2025-09-10 02:23:17.435258', 'step': 3511, 'epoch': 2} {'type': 'loss', 'content': 0.0014153111260384321, 'timestamp': '2025-09-10 02:23:17.466397', 'step': 3512, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 7593922352128}, 'timestamp': '2025-09-10 02:23:17.498228', 'step': 3512, 'epoch': 2} {'type': 'loss', 'content': 0.0020929204765707254, 'timestamp': '2025-09-10 02:23:17.503563', 'step': 3513, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 176], 'flops': 5220903722048}, 'timestamp': '2025-09-10 02:23:17.533849', 'step': 3513, 'epoch': 2} {'type': 'loss', 'content': 0.000537493615411222, 'timestamp': '2025-09-10 02:23:17.537962', 'step': 3514, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 6644714900096}, 'timestamp': '2025-09-10 02:23:17.568274', 'step': 3514, 'epoch': 2} {'type': 'loss', 'content': 0.0007732919184491038, 'timestamp': '2025-09-10 02:23:17.575550', 'step': 3515, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 6170111174080}, 'timestamp': '2025-09-10 02:23:17.610571', 'step': 3515, 'epoch': 2} {'type': 'loss', 'content': 0.0020853166934102774, 'timestamp': '2025-09-10 02:23:17.638308', 'step': 3516, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 6170111174080}, 'timestamp': '2025-09-10 02:23:17.670249', 'step': 3516, 'epoch': 2} {'type': 'loss', 'content': 0.008857056498527527, 'timestamp': '2025-09-10 02:23:17.674492', 'step': 3517, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 8068526078144}, 'timestamp': '2025-09-10 02:23:17.706110', 'step': 3517, 'epoch': 2} {'type': 'loss', 'content': 0.001030710176564753, 'timestamp': '2025-09-10 02:23:17.716250', 'step': 3518, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 7593922352128}, 'timestamp': '2025-09-10 02:23:17.751247', 'step': 3518, 'epoch': 2} {'type': 'loss', 'content': 0.0005307839601300657, 'timestamp': '2025-09-10 02:23:17.759184', 'step': 3519, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 7593922352128}, 'timestamp': '2025-09-10 02:23:17.790289', 'step': 3519, 'epoch': 2} {'type': 'loss', 'content': 0.003780403407290578, 'timestamp': '2025-09-10 02:23:17.819065', 'step': 3520, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 176], 'flops': 5220903722048}, 'timestamp': '2025-09-10 02:23:17.849819', 'step': 3520, 'epoch': 2} {'type': 'loss', 'content': 0.0005517909303307533, 'timestamp': '2025-09-10 02:23:17.852213', 'step': 3521, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 7119318626112}, 'timestamp': '2025-09-10 02:23:17.883587', 'step': 3521, 'epoch': 2} {'type': 'loss', 'content': 0.0004757777787744999, 'timestamp': '2025-09-10 02:23:17.891136', 'step': 3522, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 5695507448064}, 'timestamp': '2025-09-10 02:23:17.922466', 'step': 3522, 'epoch': 2} {'type': 'loss', 'content': 0.000609197246376425, 'timestamp': '2025-09-10 02:23:17.927193', 'step': 3523, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 7593922352128}, 'timestamp': '2025-09-10 02:23:17.958742', 'step': 3523, 'epoch': 2} {'type': 'loss', 'content': 0.0007339877774938941, 'timestamp': '2025-09-10 02:23:17.987436', 'step': 3524, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 8068526078144}, 'timestamp': '2025-09-10 02:23:18.020103', 'step': 3524, 'epoch': 2} {'type': 'loss', 'content': 0.0018651520367711782, 'timestamp': '2025-09-10 02:23:18.028154', 'step': 3525, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 416], 'flops': 12339959612288}, 'timestamp': '2025-09-10 02:23:18.066961', 'step': 3525, 'epoch': 2} {'type': 'loss', 'content': 0.003974412567913532, 'timestamp': '2025-09-10 02:23:18.082846', 'step': 3526, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 176], 'flops': 5220903722048}, 'timestamp': '2025-09-10 02:23:18.114410', 'step': 3526, 'epoch': 2} {'type': 'loss', 'content': 0.0008718844619579613, 'timestamp': '2025-09-10 02:23:18.118537', 'step': 3527, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 7119318626112}, 'timestamp': '2025-09-10 02:23:18.150124', 'step': 3527, 'epoch': 2} {'type': 'loss', 'content': 0.00030707629048265517, 'timestamp': '2025-09-10 02:23:18.178486', 'step': 3528, 'epoch': 2} {'type': 'flops', 'content': [{'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5695213340928}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3796808916480}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4429610391296}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4746011128704}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4746011128704}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6011614078336}, {'type': 'perplexity', 'in_batch_dim': [8, 736], 'batch_size': 8, 'flops': 14554433988352}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3796808916480}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4113209653888}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3796808916480}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4746011128704}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2847606704256}, {'type': 'perplexity', 'in_batch_dim': [8, 320], 'batch_size': 8, 'flops': 6328014815744}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5378812603520}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3796808916480}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4746011128704}, {'type': 'perplexity', 'in_batch_dim': [8, 352], 'batch_size': 8, 'flops': 6960816290560}, {'type': 'perplexity', 'in_batch_dim': [8, 384], 'batch_size': 8, 'flops': 7593617765376}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5378812603520}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4429610391296}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4113209653888}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5062411866112}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5062411866112}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4429610391296}, {'type': 'perplexity', 'in_batch_dim': [8, 480], 'batch_size': 8, 'flops': 9492022189824}, {'type': 'perplexity', 'in_batch_dim': [8, 448], 'batch_size': 8, 'flops': 8859220715008}, {'type': 'perplexity', 'in_batch_dim': [8, 320], 'batch_size': 8, 'flops': 6328014815744}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3796808916480}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6011614078336}, {'type': 'perplexity', 'in_batch_dim': [8, 320], 'batch_size': 8, 'flops': 6328014815744}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5695213340928}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4746011128704}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4429610391296}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5378812603520}, {'type': 'perplexity', 'in_batch_dim': [8, 544], 'batch_size': 8, 'flops': 10757625139456}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4429610391296}, {'type': 'perplexity', 'in_batch_dim': [8, 320], 'batch_size': 8, 'flops': 6328014815744}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4113209653888}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4113209653888}, {'type': 'perplexity', 'in_batch_dim': [8, 320], 'batch_size': 8, 'flops': 6328014815744}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3480408179072}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5062411866112}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6011614078336}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4429610391296}, {'type': 'perplexity', 'in_batch_dim': [8, 384], 'batch_size': 8, 'flops': 7593617765376}, {'type': 'perplexity', 'in_batch_dim': [8, 416], 'batch_size': 8, 'flops': 8226419240192}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4746011128704}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3480408179072}, {'type': 'perplexity', 'in_batch_dim': [8, 384], 'batch_size': 8, 'flops': 7593617765376}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3480408179072}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3480408179072}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5378812603520}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3796808916480}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4113209653888}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5062411866112}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5378812603520}, {'type': 'perplexity', 'in_batch_dim': [8, 320], 'batch_size': 8, 'flops': 6328014815744}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4429610391296}, {'type': 'perplexity', 'in_batch_dim': [8, 160], 'batch_size': 8, 'flops': 3164007441664}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4113209653888}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5695213340928}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3796808916480}, {'type': 'perplexity', 'in_batch_dim': [8, 160], 'batch_size': 8, 'flops': 3164007441664}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6011614078336}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5062411866112}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4429610391296}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3480408179072}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4746011128704}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3480408179072}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4113209653888}, {'type': 'perplexity', 'in_batch_dim': [8, 624], 'batch_size': 8, 'flops': 12339628826496}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4113209653888}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5695213340928}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5062411866112}, {'type': 'perplexity', 'in_batch_dim': [8, 384], 'batch_size': 8, 'flops': 7593617765376}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4429610391296}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4113209653888}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5062411866112}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4429610391296}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5695213340928}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3796808916480}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5378812603520}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4429610391296}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5378812603520}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3796808916480}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5695213340928}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4429610391296}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5062411866112}, {'type': 'perplexity', 'in_batch_dim': [8, 160], 'batch_size': 8, 'flops': 3164007441664}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4746011128704}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5695213340928}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3796808916480}, {'type': 'perplexity', 'in_batch_dim': [8, 416], 'batch_size': 8, 'flops': 8226419240192}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3796808916480}, {'type': 'perplexity', 'in_batch_dim': [8, 160], 'batch_size': 8, 'flops': 3164007441664}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4746011128704}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3796808916480}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2847606704256}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5378812603520}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4429610391296}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4113209653888}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4429610391296}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6011614078336}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4113209653888}, {'type': 'perplexity', 'in_batch_dim': [8, 320], 'batch_size': 8, 'flops': 6328014815744}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4746011128704}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4746011128704}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3796808916480}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5062411866112}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3480408179072}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3796808916480}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3480408179072}, {'type': 'perplexity', 'in_batch_dim': [8, 352], 'batch_size': 8, 'flops': 6960816290560}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5062411866112}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4429610391296}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6011614078336}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5062411866112}, {'type': 'perplexity', 'in_batch_dim': [8, 576], 'batch_size': 8, 'flops': 11390426614272}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4429610391296}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3796808916480}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6011614078336}, {'type': 'perplexity', 'in_batch_dim': [8, 432], 'batch_size': 8, 'flops': 8542819977600}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5378812603520}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5378812603520}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5062411866112}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4113209653888}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4113209653888}, {'type': 'perplexity', 'in_batch_dim': [8, 368], 'batch_size': 8, 'flops': 7277217027968}, {'type': 'perplexity', 'in_batch_dim': [8, 160], 'batch_size': 8, 'flops': 3164007441664}, {'type': 'perplexity', 'in_batch_dim': [8, 368], 'batch_size': 8, 'flops': 7277217027968}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3796808916480}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4746011128704}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4113209653888}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4429610391296}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5695213340928}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5695213340928}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5062411866112}, {'type': 'perplexity', 'in_batch_dim': [8, 432], 'batch_size': 8, 'flops': 8542819977600}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5062411866112}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4113209653888}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3480408179072}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3480408179072}, {'type': 'perplexity', 'in_batch_dim': [8, 160], 'batch_size': 8, 'flops': 3164007441664}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4113209653888}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3796808916480}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5062411866112}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 368], 'batch_size': 8, 'flops': 7277217027968}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5378812603520}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5378812603520}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5062411866112}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2847606704256}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4429610391296}, {'type': 'perplexity', 'in_batch_dim': [8, 320], 'batch_size': 8, 'flops': 6328014815744}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3796808916480}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4429610391296}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4113209653888}, {'type': 'perplexity', 'in_batch_dim': [8, 384], 'batch_size': 8, 'flops': 7593617765376}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5378812603520}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4746011128704}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3480408179072}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5062411866112}, {'type': 'perplexity', 'in_batch_dim': [8, 320], 'batch_size': 8, 'flops': 6328014815744}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5378812603520}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4113209653888}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4113209653888}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5695213340928}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3480408179072}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6011614078336}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4746011128704}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5378812603520}, {'type': 'perplexity', 'in_batch_dim': [8, 320], 'batch_size': 8, 'flops': 6328014815744}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4429610391296}, {'type': 'perplexity', 'in_batch_dim': [8, 320], 'batch_size': 8, 'flops': 6328014815744}, {'type': 'perplexity', 'in_batch_dim': [8, 416], 'batch_size': 8, 'flops': 8226419240192}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5062411866112}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5062411866112}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2847606704256}, {'type': 'perplexity', 'in_batch_dim': [8, 336], 'batch_size': 8, 'flops': 6644415553152}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6011614078336}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5062411866112}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4113209653888}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5695213340928}, {'type': 'perplexity', 'in_batch_dim': [8, 416], 'batch_size': 8, 'flops': 8226419240192}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3796808916480}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5695213340928}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4746011128704}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4429610391296}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4113209653888}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3480408179072}, {'type': 'perplexity', 'in_batch_dim': [8, 320], 'batch_size': 8, 'flops': 6328014815744}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3480408179072}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5378812603520}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6011614078336}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4429610391296}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5695213340928}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5695213340928}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3480408179072}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4429610391296}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5062411866112}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3480408179072}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4113209653888}, {'type': 'perplexity', 'in_batch_dim': [8, 352], 'batch_size': 8, 'flops': 6960816290560}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4113209653888}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2847606704256}, {'type': 'perplexity', 'in_batch_dim': [8, 160], 'batch_size': 8, 'flops': 3164007441664}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3796808916480}, {'type': 'perplexity', 'in_batch_dim': [8, 496], 'batch_size': 8, 'flops': 9808422927232}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3480408179072}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4113209653888}, {'type': 'perplexity', 'in_batch_dim': [8, 544], 'batch_size': 8, 'flops': 10757625139456}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6011614078336}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5378812603520}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5695213340928}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4746011128704}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5695213340928}, {'type': 'perplexity', 'in_batch_dim': [8, 384], 'batch_size': 8, 'flops': 7593617765376}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3480408179072}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5378812603520}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3480408179072}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5378812603520}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5062411866112}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3796808916480}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5062411866112}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5695213340928}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5378812603520}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4429610391296}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5062411866112}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3480408179072}, {'type': 'perplexity', 'in_batch_dim': [8, 656], 'batch_size': 8, 'flops': 12972430301312}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5062411866112}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3796808916480}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3480408179072}, {'type': 'perplexity', 'in_batch_dim': [8, 160], 'batch_size': 8, 'flops': 3164007441664}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5695213340928}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6011614078336}, {'type': 'perplexity', 'in_batch_dim': [8, 320], 'batch_size': 8, 'flops': 6328014815744}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5062411866112}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5062411866112}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5695213340928}, {'type': 'perplexity', 'in_batch_dim': [8, 320], 'batch_size': 8, 'flops': 6328014815744}, {'type': 'perplexity', 'in_batch_dim': [8, 384], 'batch_size': 8, 'flops': 7593617765376}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5062411866112}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4746011128704}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4113209653888}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4746011128704}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4746011128704}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3480408179072}, {'type': 'perplexity', 'in_batch_dim': [8, 320], 'batch_size': 8, 'flops': 6328014815744}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5695213340928}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5062411866112}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4113209653888}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4746011128704}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2847606704256}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3796808916480}, {'type': 'perplexity', 'in_batch_dim': [8, 336], 'batch_size': 8, 'flops': 6644415553152}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4113209653888}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4746011128704}, {'type': 'perplexity', 'in_batch_dim': [8, 496], 'batch_size': 8, 'flops': 9808422927232}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5378812603520}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5378812603520}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4429610391296}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5062411866112}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6011614078336}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4746011128704}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3796808916480}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4113209653888}, {'type': 'perplexity', 'in_batch_dim': [8, 496], 'batch_size': 8, 'flops': 9808422927232}, {'type': 'perplexity', 'in_batch_dim': [8, 448], 'batch_size': 8, 'flops': 8859220715008}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6011614078336}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3796808916480}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3480408179072}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5062411866112}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4429610391296}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4113209653888}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4746011128704}, {'type': 'perplexity', 'in_batch_dim': [8, 320], 'batch_size': 8, 'flops': 6328014815744}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6011614078336}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3480408179072}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3796808916480}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3796808916480}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4746011128704}, {'type': 'perplexity', 'in_batch_dim': [8, 416], 'batch_size': 8, 'flops': 8226419240192}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5695213340928}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6011614078336}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6011614078336}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4746011128704}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3480408179072}, {'type': 'perplexity', 'in_batch_dim': [8, 160], 'batch_size': 8, 'flops': 3164007441664}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5695213340928}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3480408179072}, {'type': 'perplexity', 'in_batch_dim': [8, 400], 'batch_size': 8, 'flops': 7910018502784}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4429610391296}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5378812603520}, {'type': 'perplexity', 'in_batch_dim': [8, 400], 'batch_size': 8, 'flops': 7910018502784}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5378812603520}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3480408179072}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6011614078336}, {'type': 'perplexity', 'in_batch_dim': [8, 160], 'batch_size': 8, 'flops': 3164007441664}, {'type': 'perplexity', 'in_batch_dim': [8, 352], 'batch_size': 8, 'flops': 6960816290560}, {'type': 'perplexity', 'in_batch_dim': [8, 320], 'batch_size': 8, 'flops': 6328014815744}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5062411866112}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5062411866112}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4113209653888}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4429610391296}, {'type': 'perplexity', 'in_batch_dim': [8, 352], 'batch_size': 8, 'flops': 6960816290560}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4113209653888}, {'type': 'perplexity', 'in_batch_dim': [8, 160], 'batch_size': 8, 'flops': 3164007441664}, {'type': 'perplexity', 'in_batch_dim': [8, 336], 'batch_size': 8, 'flops': 6644415553152}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4429610391296}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3796808916480}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5378812603520}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5378812603520}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4113209653888}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5378812603520}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3796808916480}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5378812603520}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5378812603520}, {'type': 'perplexity', 'in_batch_dim': [8, 160], 'batch_size': 8, 'flops': 3164007441664}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3480408179072}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6011614078336}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5695213340928}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5378812603520}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5378812603520}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4429610391296}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4746011128704}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6011614078336}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5062411866112}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6011614078336}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5378812603520}, {'type': 'perplexity', 'in_batch_dim': [8, 160], 'batch_size': 8, 'flops': 3164007441664}, {'type': 'perplexity', 'in_batch_dim': [8, 160], 'batch_size': 8, 'flops': 3164007441664}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5062411866112}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5062411866112}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3796808916480}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6011614078336}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4113209653888}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4746011128704}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5378812603520}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3480408179072}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3796808916480}, {'type': 'perplexity', 'in_batch_dim': [8, 320], 'batch_size': 8, 'flops': 6328014815744}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5062411866112}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4746011128704}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5062411866112}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4746011128704}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6011614078336}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3796808916480}, {'type': 'perplexity', 'in_batch_dim': [8, 160], 'batch_size': 8, 'flops': 3164007441664}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2847606704256}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4429610391296}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4746011128704}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5062411866112}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5378812603520}, {'type': 'perplexity', 'in_batch_dim': [8, 464], 'batch_size': 8, 'flops': 9175621452416}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4113209653888}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3480408179072}, {'type': 'perplexity', 'in_batch_dim': [8, 384], 'batch_size': 8, 'flops': 7593617765376}, {'type': 'perplexity', 'in_batch_dim': [8, 448], 'batch_size': 8, 'flops': 8859220715008}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5378812603520}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4113209653888}, {'type': 'perplexity', 'in_batch_dim': [8, 320], 'batch_size': 8, 'flops': 6328014815744}, {'type': 'perplexity', 'in_batch_dim': [8, 560], 'batch_size': 8, 'flops': 11074025876864}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5378812603520}, {'type': 'perplexity', 'in_batch_dim': [8, 160], 'batch_size': 8, 'flops': 3164007441664}, {'type': 'perplexity', 'in_batch_dim': [8, 320], 'batch_size': 8, 'flops': 6328014815744}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5062411866112}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6011614078336}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4746011128704}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6011614078336}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3796808916480}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4429610391296}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4113209653888}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4746011128704}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3480408179072}, {'type': 'perplexity', 'in_batch_dim': [8, 384], 'batch_size': 8, 'flops': 7593617765376}, {'type': 'perplexity', 'in_batch_dim': [8, 576], 'batch_size': 8, 'flops': 11390426614272}, {'type': 'perplexity', 'in_batch_dim': [8, 160], 'batch_size': 8, 'flops': 3164007441664}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4746011128704}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4746011128704}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4429610391296}, {'type': 'perplexity', 'in_batch_dim': [8, 336], 'batch_size': 8, 'flops': 6644415553152}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4746011128704}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3796808916480}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3796808916480}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5062411866112}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5378812603520}, {'type': 'perplexity', 'in_batch_dim': [8, 368], 'batch_size': 8, 'flops': 7277217027968}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3480408179072}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4113209653888}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4746011128704}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3480408179072}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4746011128704}, {'type': 'perplexity', 'in_batch_dim': [8, 1168], 'batch_size': 8, 'flops': 23097253898368}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5062411866112}, {'type': 'perplexity', 'in_batch_dim': [8, 160], 'batch_size': 8, 'flops': 3164007441664}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4746011128704}, {'type': 'perplexity', 'in_batch_dim': [8, 640], 'batch_size': 8, 'flops': 12656029563904}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5378812603520}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4429610391296}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4429610391296}, {'type': 'perplexity', 'in_batch_dim': [8, 384], 'batch_size': 8, 'flops': 7593617765376}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3480408179072}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4113209653888}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3796808916480}, {'type': 'perplexity', 'in_batch_dim': [8, 336], 'batch_size': 8, 'flops': 6644415553152}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3480408179072}, {'type': 'perplexity', 'in_batch_dim': [6, 208], 'batch_size': 8, 'flops': 4113209653888}], 'timestamp': '2025-09-10 02:23:28.262537', 'step': 3528, 'epoch': 2} {'type': 'pplx', 'content': 22053610.470987573, 'timestamp': '2025-09-10 02:23:28.265279', 'step': 3528, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 304], 'flops': 9017733530176}, 'timestamp': '2025-09-10 02:23:28.295598', 'step': 3528, 'epoch': 2} {'type': 'loss', 'content': 0.00019998988136649132, 'timestamp': '2025-09-10 02:23:28.303559', 'step': 3529, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 8543129804160}, 'timestamp': '2025-09-10 02:23:28.335417', 'step': 3529, 'epoch': 2} {'type': 'loss', 'content': 0.013725848868489265, 'timestamp': '2025-09-10 02:23:28.345349', 'step': 3530, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 5695507448064}, 'timestamp': '2025-09-10 02:23:28.376647', 'step': 3530, 'epoch': 2} {'type': 'loss', 'content': 0.001963739050552249, 'timestamp': '2025-09-10 02:23:28.381047', 'step': 3531, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 432], 'flops': 12814563338304}, 'timestamp': '2025-09-10 02:23:28.419680', 'step': 3531, 'epoch': 2} {'type': 'loss', 'content': 0.01121476013213396, 'timestamp': '2025-09-10 02:23:28.456740', 'step': 3532, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 8543129804160}, 'timestamp': '2025-09-10 02:23:28.487527', 'step': 3532, 'epoch': 2} {'type': 'loss', 'content': 0.00019301848078612238, 'timestamp': '2025-09-10 02:23:28.496081', 'step': 3533, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 320], 'flops': 9492337256192}, 'timestamp': '2025-09-10 02:23:28.527158', 'step': 3533, 'epoch': 2} {'type': 'loss', 'content': 0.0023806169629096985, 'timestamp': '2025-09-10 02:23:28.539780', 'step': 3534, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 5695507448064}, 'timestamp': '2025-09-10 02:23:28.570913', 'step': 3534, 'epoch': 2} {'type': 'loss', 'content': 0.0010761814191937447, 'timestamp': '2025-09-10 02:23:28.575368', 'step': 3535, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 7119318626112}, 'timestamp': '2025-09-10 02:23:28.606462', 'step': 3535, 'epoch': 2} {'type': 'loss', 'content': 0.00018318326328881085, 'timestamp': '2025-09-10 02:23:28.635024', 'step': 3536, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-10 02:23:28.665441', 'step': 3536, 'epoch': 2} {'type': 'loss', 'content': 0.0066347974352538586, 'timestamp': '2025-09-10 02:23:28.667441', 'step': 3537, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 5695507448064}, 'timestamp': '2025-09-10 02:23:28.698524', 'step': 3537, 'epoch': 2} {'type': 'loss', 'content': 0.00013012031558901072, 'timestamp': '2025-09-10 02:23:28.703094', 'step': 3538, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 512], 'flops': 15187581968384}, 'timestamp': '2025-09-10 02:23:28.747230', 'step': 3538, 'epoch': 2} {'type': 'loss', 'content': 0.00027974756085313857, 'timestamp': '2025-09-10 02:23:28.764985', 'step': 3539, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 8543129804160}, 'timestamp': '2025-09-10 02:23:28.795795', 'step': 3539, 'epoch': 2} {'type': 'loss', 'content': 0.003288812702521682, 'timestamp': '2025-09-10 02:23:28.827880', 'step': 3540, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 176], 'flops': 5220903722048}, 'timestamp': '2025-09-10 02:23:28.860596', 'step': 3540, 'epoch': 2} {'type': 'loss', 'content': 0.0002967107866425067, 'timestamp': '2025-09-10 02:23:28.862803', 'step': 3541, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 6170111174080}, 'timestamp': '2025-09-10 02:23:28.893979', 'step': 3541, 'epoch': 2} {'type': 'loss', 'content': 0.0013638153905048966, 'timestamp': '2025-09-10 02:23:28.900819', 'step': 3542, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 6170111174080}, 'timestamp': '2025-09-10 02:23:28.931708', 'step': 3542, 'epoch': 2} {'type': 'loss', 'content': 0.0009955601999536157, 'timestamp': '2025-09-10 02:23:28.938584', 'step': 3543, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 8543129804160}, 'timestamp': '2025-09-10 02:23:28.970094', 'step': 3543, 'epoch': 2} {'type': 'loss', 'content': 0.005556150339543819, 'timestamp': '2025-09-10 02:23:29.002133', 'step': 3544, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 4746299996032}, 'timestamp': '2025-09-10 02:23:29.032247', 'step': 3544, 'epoch': 2} {'type': 'loss', 'content': 0.0003385456802789122, 'timestamp': '2025-09-10 02:23:29.034306', 'step': 3545, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 6170111174080}, 'timestamp': '2025-09-10 02:23:29.064533', 'step': 3545, 'epoch': 2} {'type': 'loss', 'content': 0.0076200878247618675, 'timestamp': '2025-09-10 02:23:29.071305', 'step': 3546, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 6644714900096}, 'timestamp': '2025-09-10 02:23:29.101681', 'step': 3546, 'epoch': 2} {'type': 'loss', 'content': 0.006286826450377703, 'timestamp': '2025-09-10 02:23:29.108661', 'step': 3547, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 304], 'flops': 9017733530176}, 'timestamp': '2025-09-10 02:23:29.139842', 'step': 3547, 'epoch': 2} {'type': 'loss', 'content': 0.0003426824405323714, 'timestamp': '2025-09-10 02:23:29.173020', 'step': 3548, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 6170111174080}, 'timestamp': '2025-09-10 02:23:29.204891', 'step': 3548, 'epoch': 2} {'type': 'loss', 'content': 0.034861672669649124, 'timestamp': '2025-09-10 02:23:29.209502', 'step': 3549, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 336], 'flops': 9966940982208}, 'timestamp': '2025-09-10 02:23:29.251602', 'step': 3549, 'epoch': 2} {'type': 'loss', 'content': 0.0012050783261656761, 'timestamp': '2025-09-10 02:23:29.265034', 'step': 3550, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 7593922352128}, 'timestamp': '2025-09-10 02:23:29.301796', 'step': 3550, 'epoch': 2} {'type': 'loss', 'content': 0.0464249923825264, 'timestamp': '2025-09-10 02:23:29.309590', 'step': 3551, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 624], 'flops': 18509808050496}, 'timestamp': '2025-09-10 02:23:29.366721', 'step': 3551, 'epoch': 2} {'type': 'loss', 'content': 0.006405732128769159, 'timestamp': '2025-09-10 02:23:29.409390', 'step': 3552, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 8543129804160}, 'timestamp': '2025-09-10 02:23:29.450769', 'step': 3552, 'epoch': 2} {'type': 'loss', 'content': 0.00038313533877953887, 'timestamp': '2025-09-10 02:23:29.458913', 'step': 3553, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 7593922352128}, 'timestamp': '2025-09-10 02:23:29.495471', 'step': 3553, 'epoch': 2} {'type': 'loss', 'content': 0.0009717740467749536, 'timestamp': '2025-09-10 02:23:29.503209', 'step': 3554, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 400], 'flops': 11865355886272}, 'timestamp': '2025-09-10 02:23:29.546360', 'step': 3554, 'epoch': 2} {'type': 'loss', 'content': 0.0007425061194226146, 'timestamp': '2025-09-10 02:23:29.561963', 'step': 3555, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 6644714900096}, 'timestamp': '2025-09-10 02:23:29.602489', 'step': 3555, 'epoch': 2} {'type': 'loss', 'content': 0.0007218411774374545, 'timestamp': '2025-09-10 02:23:29.630453', 'step': 3556, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 5695507448064}, 'timestamp': '2025-09-10 02:23:29.673053', 'step': 3556, 'epoch': 2} {'type': 'loss', 'content': 0.0036273004952818155, 'timestamp': '2025-09-10 02:23:29.677334', 'step': 3557, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 336], 'flops': 9966940982208}, 'timestamp': '2025-09-10 02:23:29.718240', 'step': 3557, 'epoch': 2} {'type': 'loss', 'content': 0.0006142216734588146, 'timestamp': '2025-09-10 02:23:29.731639', 'step': 3558, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 464], 'flops': 13763770790336}, 'timestamp': '2025-09-10 02:23:29.781095', 'step': 3558, 'epoch': 2} {'type': 'loss', 'content': 0.0007438276661559939, 'timestamp': '2025-09-10 02:23:29.798206', 'step': 3559, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 384], 'flops': 11390752160256}, 'timestamp': '2025-09-10 02:23:29.840536', 'step': 3559, 'epoch': 2} {'type': 'loss', 'content': 0.00014973794168327004, 'timestamp': '2025-09-10 02:23:29.875435', 'step': 3560, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 6644714900096}, 'timestamp': '2025-09-10 02:23:29.908439', 'step': 3560, 'epoch': 2} {'type': 'loss', 'content': 0.0003213935415260494, 'timestamp': '2025-09-10 02:23:29.913349', 'step': 3561, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 304], 'flops': 9017733530176}, 'timestamp': '2025-09-10 02:23:29.947496', 'step': 3561, 'epoch': 2} {'type': 'loss', 'content': 0.0009177852771244943, 'timestamp': '2025-09-10 02:23:29.959499', 'step': 3562, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 8543129804160}, 'timestamp': '2025-09-10 02:23:29.991532', 'step': 3562, 'epoch': 2} {'type': 'loss', 'content': 0.0032881794031709433, 'timestamp': '2025-09-10 02:23:30.002537', 'step': 3563, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 8543129804160}, 'timestamp': '2025-09-10 02:23:30.033810', 'step': 3563, 'epoch': 2} {'type': 'loss', 'content': 0.004884254653006792, 'timestamp': '2025-09-10 02:23:30.065793', 'step': 3564, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 304], 'flops': 9017733530176}, 'timestamp': '2025-09-10 02:23:30.096997', 'step': 3564, 'epoch': 2} {'type': 'loss', 'content': 0.005217746831476688, 'timestamp': '2025-09-10 02:23:30.106937', 'step': 3565, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 176], 'flops': 5220903722048}, 'timestamp': '2025-09-10 02:23:30.137626', 'step': 3565, 'epoch': 2} {'type': 'loss', 'content': 0.002377876313403249, 'timestamp': '2025-09-10 02:23:30.141679', 'step': 3566, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 7119318626112}, 'timestamp': '2025-09-10 02:23:30.173799', 'step': 3566, 'epoch': 2} {'type': 'loss', 'content': 0.0286801066249609, 'timestamp': '2025-09-10 02:23:30.181406', 'step': 3567, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 8068526078144}, 'timestamp': '2025-09-10 02:23:30.218965', 'step': 3567, 'epoch': 2} {'type': 'loss', 'content': 0.025519220158457756, 'timestamp': '2025-09-10 02:23:30.250158', 'step': 3568, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 6170111174080}, 'timestamp': '2025-09-10 02:23:30.280235', 'step': 3568, 'epoch': 2} {'type': 'loss', 'content': 0.0003562222118489444, 'timestamp': '2025-09-10 02:23:30.284802', 'step': 3569, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 7119318626112}, 'timestamp': '2025-09-10 02:23:30.316472', 'step': 3569, 'epoch': 2} {'type': 'loss', 'content': 0.013461283408105373, 'timestamp': '2025-09-10 02:23:30.323863', 'step': 3570, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 6170111174080}, 'timestamp': '2025-09-10 02:23:30.355122', 'step': 3570, 'epoch': 2} {'type': 'loss', 'content': 0.00494401203468442, 'timestamp': '2025-09-10 02:23:30.362015', 'step': 3571, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 8068526078144}, 'timestamp': '2025-09-10 02:23:30.394352', 'step': 3571, 'epoch': 2} {'type': 'loss', 'content': 0.000439615425420925, 'timestamp': '2025-09-10 02:23:30.425502', 'step': 3572, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 8068526078144}, 'timestamp': '2025-09-10 02:23:30.456922', 'step': 3572, 'epoch': 2} {'type': 'loss', 'content': 0.0008407292771153152, 'timestamp': '2025-09-10 02:23:30.464692', 'step': 3573, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 176], 'flops': 5220903722048}, 'timestamp': '2025-09-10 02:23:30.495409', 'step': 3573, 'epoch': 2} {'type': 'loss', 'content': 0.0009364182478748262, 'timestamp': '2025-09-10 02:23:30.499546', 'step': 3574, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 7119318626112}, 'timestamp': '2025-09-10 02:23:30.531237', 'step': 3574, 'epoch': 2} {'type': 'loss', 'content': 0.026412170380353928, 'timestamp': '2025-09-10 02:23:30.538705', 'step': 3575, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 7119318626112}, 'timestamp': '2025-09-10 02:23:30.569767', 'step': 3575, 'epoch': 2} {'type': 'loss', 'content': 0.0028750034980475903, 'timestamp': '2025-09-10 02:23:30.598405', 'step': 3576, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 7119318626112}, 'timestamp': '2025-09-10 02:23:30.629371', 'step': 3576, 'epoch': 2} {'type': 'loss', 'content': 0.0007422365597449243, 'timestamp': '2025-09-10 02:23:30.634494', 'step': 3577, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 4746299996032}, 'timestamp': '2025-09-10 02:23:30.664788', 'step': 3577, 'epoch': 2} {'type': 'loss', 'content': 0.00027479632990434766, 'timestamp': '2025-09-10 02:23:30.667506', 'step': 3578, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 176], 'flops': 5220903722048}, 'timestamp': '2025-09-10 02:23:30.697388', 'step': 3578, 'epoch': 2} {'type': 'loss', 'content': 0.0005509444163180888, 'timestamp': '2025-09-10 02:23:30.701644', 'step': 3579, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 7119318626112}, 'timestamp': '2025-09-10 02:23:30.732510', 'step': 3579, 'epoch': 2} {'type': 'loss', 'content': 0.0011080257827416062, 'timestamp': '2025-09-10 02:23:30.761032', 'step': 3580, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 304], 'flops': 9017733530176}, 'timestamp': '2025-09-10 02:23:30.792233', 'step': 3580, 'epoch': 2} {'type': 'loss', 'content': 0.001185481552965939, 'timestamp': '2025-09-10 02:23:30.802266', 'step': 3581, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 176], 'flops': 5220903722048}, 'timestamp': '2025-09-10 02:23:30.832648', 'step': 3581, 'epoch': 2} {'type': 'loss', 'content': 0.0008745525847189128, 'timestamp': '2025-09-10 02:23:30.837004', 'step': 3582, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 304], 'flops': 9017733530176}, 'timestamp': '2025-09-10 02:23:30.870080', 'step': 3582, 'epoch': 2} {'type': 'loss', 'content': 0.021427616477012634, 'timestamp': '2025-09-10 02:23:30.881842', 'step': 3583, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 5695507448064}, 'timestamp': '2025-09-10 02:23:30.913345', 'step': 3583, 'epoch': 2} {'type': 'loss', 'content': 0.00033548500505276024, 'timestamp': '2025-09-10 02:23:30.938697', 'step': 3584, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 5695507448064}, 'timestamp': '2025-09-10 02:23:30.976287', 'step': 3584, 'epoch': 2} {'type': 'loss', 'content': 0.001324447919614613, 'timestamp': '2025-09-10 02:23:30.979084', 'step': 3585, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 8068526078144}, 'timestamp': '2025-09-10 02:23:31.014022', 'step': 3585, 'epoch': 2} {'type': 'loss', 'content': 0.027664339169859886, 'timestamp': '2025-09-10 02:23:31.022956', 'step': 3586, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 7119318626112}, 'timestamp': '2025-09-10 02:23:31.054239', 'step': 3586, 'epoch': 2} {'type': 'loss', 'content': 0.0006238113855943084, 'timestamp': '2025-09-10 02:23:31.061602', 'step': 3587, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 6644714900096}, 'timestamp': '2025-09-10 02:23:31.092272', 'step': 3587, 'epoch': 2} {'type': 'loss', 'content': 0.0007867095409892499, 'timestamp': '2025-09-10 02:23:31.120230', 'step': 3588, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 304], 'flops': 9017733530176}, 'timestamp': '2025-09-10 02:23:31.151875', 'step': 3588, 'epoch': 2} {'type': 'loss', 'content': 0.029622314497828484, 'timestamp': '2025-09-10 02:23:31.161735', 'step': 3589, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 6644714900096}, 'timestamp': '2025-09-10 02:23:31.193252', 'step': 3589, 'epoch': 2} {'type': 'loss', 'content': 0.00037610027357004583, 'timestamp': '2025-09-10 02:23:31.200375', 'step': 3590, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 176], 'flops': 5220903722048}, 'timestamp': '2025-09-10 02:23:31.230682', 'step': 3590, 'epoch': 2} {'type': 'loss', 'content': 0.002425598446279764, 'timestamp': '2025-09-10 02:23:31.234856', 'step': 3591, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 6644714900096}, 'timestamp': '2025-09-10 02:23:31.266565', 'step': 3591, 'epoch': 2} {'type': 'loss', 'content': 0.019675688818097115, 'timestamp': '2025-09-10 02:23:31.294566', 'step': 3592, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 6644714900096}, 'timestamp': '2025-09-10 02:23:31.326699', 'step': 3592, 'epoch': 2} {'type': 'loss', 'content': 0.0001952952443389222, 'timestamp': '2025-09-10 02:23:31.331488', 'step': 3593, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 8068526078144}, 'timestamp': '2025-09-10 02:23:31.362721', 'step': 3593, 'epoch': 2} {'type': 'loss', 'content': 0.0218330230563879, 'timestamp': '2025-09-10 02:23:31.372828', 'step': 3594, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 304], 'flops': 9017733530176}, 'timestamp': '2025-09-10 02:23:31.406850', 'step': 3594, 'epoch': 2} {'type': 'loss', 'content': 0.0041700261645019054, 'timestamp': '2025-09-10 02:23:31.419032', 'step': 3595, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 8543129804160}, 'timestamp': '2025-09-10 02:23:31.451196', 'step': 3595, 'epoch': 2} {'type': 'loss', 'content': 0.004490002058446407, 'timestamp': '2025-09-10 02:23:31.483020', 'step': 3596, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 6170111174080}, 'timestamp': '2025-09-10 02:23:31.514251', 'step': 3596, 'epoch': 2} {'type': 'loss', 'content': 0.002926964545622468, 'timestamp': '2025-09-10 02:23:31.518753', 'step': 3597, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 7593922352128}, 'timestamp': '2025-09-10 02:23:31.550875', 'step': 3597, 'epoch': 2} {'type': 'loss', 'content': 0.00023523984418716282, 'timestamp': '2025-09-10 02:23:31.558371', 'step': 3598, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 6644714900096}, 'timestamp': '2025-09-10 02:23:31.590223', 'step': 3598, 'epoch': 2} {'type': 'loss', 'content': 0.004813474602997303, 'timestamp': '2025-09-10 02:23:31.597312', 'step': 3599, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 6644714900096}, 'timestamp': '2025-09-10 02:23:31.627946', 'step': 3599, 'epoch': 2} {'type': 'loss', 'content': 0.00039707665564492345, 'timestamp': '2025-09-10 02:23:31.655957', 'step': 3600, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 7593922352128}, 'timestamp': '2025-09-10 02:23:31.688467', 'step': 3600, 'epoch': 2} {'type': 'loss', 'content': 0.000567035167478025, 'timestamp': '2025-09-10 02:23:31.693782', 'step': 3601, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 6170111174080}, 'timestamp': '2025-09-10 02:23:31.724428', 'step': 3601, 'epoch': 2} {'type': 'loss', 'content': 0.015649326145648956, 'timestamp': '2025-09-10 02:23:31.731574', 'step': 3602, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 6170111174080}, 'timestamp': '2025-09-10 02:23:31.767124', 'step': 3602, 'epoch': 2} {'type': 'loss', 'content': 0.01300052274018526, 'timestamp': '2025-09-10 02:23:31.773836', 'step': 3603, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 6644714900096}, 'timestamp': '2025-09-10 02:23:31.805427', 'step': 3603, 'epoch': 2} {'type': 'loss', 'content': 0.022090671584010124, 'timestamp': '2025-09-10 02:23:31.833160', 'step': 3604, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 7593922352128}, 'timestamp': '2025-09-10 02:23:31.865752', 'step': 3604, 'epoch': 2} {'type': 'loss', 'content': 0.002304330002516508, 'timestamp': '2025-09-10 02:23:31.870759', 'step': 3605, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 176], 'flops': 5220903722048}, 'timestamp': '2025-09-10 02:23:31.901258', 'step': 3605, 'epoch': 2} {'type': 'loss', 'content': 0.03670268505811691, 'timestamp': '2025-09-10 02:23:31.905369', 'step': 3606, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 400], 'flops': 11865355886272}, 'timestamp': '2025-09-10 02:23:31.943717', 'step': 3606, 'epoch': 2} {'type': 'loss', 'content': 0.0005911525222472847, 'timestamp': '2025-09-10 02:23:31.959384', 'step': 3607, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 8068526078144}, 'timestamp': '2025-09-10 02:23:31.992348', 'step': 3607, 'epoch': 2} {'type': 'loss', 'content': 0.0006495547713711858, 'timestamp': '2025-09-10 02:23:32.023492', 'step': 3608, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 6170111174080}, 'timestamp': '2025-09-10 02:23:32.054908', 'step': 3608, 'epoch': 2} {'type': 'loss', 'content': 0.0005558125558309257, 'timestamp': '2025-09-10 02:23:32.059342', 'step': 3609, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 304], 'flops': 9017733530176}, 'timestamp': '2025-09-10 02:23:32.090916', 'step': 3609, 'epoch': 2} {'type': 'loss', 'content': 0.0061562503688037395, 'timestamp': '2025-09-10 02:23:32.102900', 'step': 3610, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 8068526078144}, 'timestamp': '2025-09-10 02:23:32.134868', 'step': 3610, 'epoch': 2} {'type': 'loss', 'content': 0.008789503946900368, 'timestamp': '2025-09-10 02:23:32.144774', 'step': 3611, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 6644714900096}, 'timestamp': '2025-09-10 02:23:32.177089', 'step': 3611, 'epoch': 2} {'type': 'loss', 'content': 0.000540457374881953, 'timestamp': '2025-09-10 02:23:32.204872', 'step': 3612, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 6170111174080}, 'timestamp': '2025-09-10 02:23:32.236140', 'step': 3612, 'epoch': 2} {'type': 'loss', 'content': 0.0006599615444429219, 'timestamp': '2025-09-10 02:23:32.240526', 'step': 3613, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 7593922352128}, 'timestamp': '2025-09-10 02:23:32.272703', 'step': 3613, 'epoch': 2} {'type': 'loss', 'content': 0.002096888143569231, 'timestamp': '2025-09-10 02:23:32.280357', 'step': 3614, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 5695507448064}, 'timestamp': '2025-09-10 02:23:32.311532', 'step': 3614, 'epoch': 2} {'type': 'loss', 'content': 0.0028935037553310394, 'timestamp': '2025-09-10 02:23:32.315934', 'step': 3615, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 5695507448064}, 'timestamp': '2025-09-10 02:23:32.347081', 'step': 3615, 'epoch': 2} {'type': 'loss', 'content': 0.0018138455925509334, 'timestamp': '2025-09-10 02:23:32.372354', 'step': 3616, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 7593922352128}, 'timestamp': '2025-09-10 02:23:32.404503', 'step': 3616, 'epoch': 2} {'type': 'loss', 'content': 0.006520233117043972, 'timestamp': '2025-09-10 02:23:32.409850', 'step': 3617, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 336], 'flops': 9966940982208}, 'timestamp': '2025-09-10 02:23:32.445950', 'step': 3617, 'epoch': 2} {'type': 'loss', 'content': 0.0026026610285043716, 'timestamp': '2025-09-10 02:23:32.459332', 'step': 3618, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 416], 'flops': 12339959612288}, 'timestamp': '2025-09-10 02:23:32.499491', 'step': 3618, 'epoch': 2} {'type': 'loss', 'content': 0.011162899434566498, 'timestamp': '2025-09-10 02:23:32.515406', 'step': 3619, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 8543129804160}, 'timestamp': '2025-09-10 02:23:32.549195', 'step': 3619, 'epoch': 2} {'type': 'loss', 'content': 0.0006067942013032734, 'timestamp': '2025-09-10 02:23:32.580873', 'step': 3620, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 6644714900096}, 'timestamp': '2025-09-10 02:23:32.612421', 'step': 3620, 'epoch': 2} {'type': 'loss', 'content': 0.0009515011915937066, 'timestamp': '2025-09-10 02:23:32.617085', 'step': 3621, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 6170111174080}, 'timestamp': '2025-09-10 02:23:32.650398', 'step': 3621, 'epoch': 2} {'type': 'loss', 'content': 0.0030977034475654364, 'timestamp': '2025-09-10 02:23:32.657133', 'step': 3622, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 8543129804160}, 'timestamp': '2025-09-10 02:23:32.690597', 'step': 3622, 'epoch': 2} {'type': 'loss', 'content': 0.0023766574449837208, 'timestamp': '2025-09-10 02:23:32.701178', 'step': 3623, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 320], 'flops': 9492337256192}, 'timestamp': '2025-09-10 02:23:32.732882', 'step': 3623, 'epoch': 2} {'type': 'loss', 'content': 0.050299059599637985, 'timestamp': '2025-09-10 02:23:32.766351', 'step': 3624, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 8068526078144}, 'timestamp': '2025-09-10 02:23:32.798823', 'step': 3624, 'epoch': 2} {'type': 'loss', 'content': 0.0003462900349404663, 'timestamp': '2025-09-10 02:23:32.806489', 'step': 3625, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 320], 'flops': 9492337256192}, 'timestamp': '2025-09-10 02:23:32.841734', 'step': 3625, 'epoch': 2} {'type': 'loss', 'content': 0.03242403641343117, 'timestamp': '2025-09-10 02:23:32.854324', 'step': 3626, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 6170111174080}, 'timestamp': '2025-09-10 02:23:32.886966', 'step': 3626, 'epoch': 2} {'type': 'loss', 'content': 0.0009655548492446542, 'timestamp': '2025-09-10 02:23:32.893816', 'step': 3627, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 8543129804160}, 'timestamp': '2025-09-10 02:23:32.925570', 'step': 3627, 'epoch': 2} {'type': 'loss', 'content': 0.011085288599133492, 'timestamp': '2025-09-10 02:23:32.957370', 'step': 3628, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 512], 'flops': 15187581968384}, 'timestamp': '2025-09-10 02:23:33.001453', 'step': 3628, 'epoch': 2} {'type': 'loss', 'content': 0.0011424163822084665, 'timestamp': '2025-09-10 02:23:33.018706', 'step': 3629, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 400], 'flops': 11865355886272}, 'timestamp': '2025-09-10 02:23:33.059054', 'step': 3629, 'epoch': 2} {'type': 'loss', 'content': 0.027174891903996468, 'timestamp': '2025-09-10 02:23:33.074676', 'step': 3630, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 5695507448064}, 'timestamp': '2025-09-10 02:23:33.112865', 'step': 3630, 'epoch': 2} {'type': 'loss', 'content': 0.009898961521685123, 'timestamp': '2025-09-10 02:23:33.116705', 'step': 3631, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 6644714900096}, 'timestamp': '2025-09-10 02:23:33.151599', 'step': 3631, 'epoch': 2} {'type': 'loss', 'content': 0.0001422611385351047, 'timestamp': '2025-09-10 02:23:33.178550', 'step': 3632, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 176], 'flops': 5220903722048}, 'timestamp': '2025-09-10 02:23:33.216383', 'step': 3632, 'epoch': 2} {'type': 'loss', 'content': 0.006423108279705048, 'timestamp': '2025-09-10 02:23:33.218613', 'step': 3633, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 5695507448064}, 'timestamp': '2025-09-10 02:23:33.250747', 'step': 3633, 'epoch': 2} {'type': 'loss', 'content': 0.0006503048934973776, 'timestamp': '2025-09-10 02:23:33.254438', 'step': 3634, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 8543129804160}, 'timestamp': '2025-09-10 02:23:33.287142', 'step': 3634, 'epoch': 2} {'type': 'loss', 'content': 0.02117828093469143, 'timestamp': '2025-09-10 02:23:33.297447', 'step': 3635, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 6644714900096}, 'timestamp': '2025-09-10 02:23:33.329371', 'step': 3635, 'epoch': 2} {'type': 'loss', 'content': 0.0025022621266543865, 'timestamp': '2025-09-10 02:23:33.356913', 'step': 3636, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 5695507448064}, 'timestamp': '2025-09-10 02:23:33.388951', 'step': 3636, 'epoch': 2} {'type': 'loss', 'content': 0.004223259165883064, 'timestamp': '2025-09-10 02:23:33.391527', 'step': 3637, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-10 02:23:33.426342', 'step': 3637, 'epoch': 2} {'type': 'loss', 'content': 0.03953773155808449, 'timestamp': '2025-09-10 02:23:33.429345', 'step': 3638, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 8068526078144}, 'timestamp': '2025-09-10 02:23:33.460347', 'step': 3638, 'epoch': 2} {'type': 'loss', 'content': 0.025826627388596535, 'timestamp': '2025-09-10 02:23:33.470332', 'step': 3639, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 368], 'flops': 10916148434240}, 'timestamp': '2025-09-10 02:23:33.505344', 'step': 3639, 'epoch': 2} {'type': 'loss', 'content': 0.0006002942100167274, 'timestamp': '2025-09-10 02:23:33.540029', 'step': 3640, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 6644714900096}, 'timestamp': '2025-09-10 02:23:33.571550', 'step': 3640, 'epoch': 2} {'type': 'loss', 'content': 0.010516015812754631, 'timestamp': '2025-09-10 02:23:33.576171', 'step': 3641, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 6644714900096}, 'timestamp': '2025-09-10 02:23:33.606991', 'step': 3641, 'epoch': 2} {'type': 'loss', 'content': 0.0020751620177179575, 'timestamp': '2025-09-10 02:23:33.613847', 'step': 3642, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 304], 'flops': 9017733530176}, 'timestamp': '2025-09-10 02:23:33.645478', 'step': 3642, 'epoch': 2} {'type': 'loss', 'content': 0.0017879597144201398, 'timestamp': '2025-09-10 02:23:33.657529', 'step': 3643, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 304], 'flops': 9017733530176}, 'timestamp': '2025-09-10 02:23:33.689679', 'step': 3643, 'epoch': 2} {'type': 'loss', 'content': 0.0004607823502738029, 'timestamp': '2025-09-10 02:23:33.722339', 'step': 3644, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 320], 'flops': 9492337256192}, 'timestamp': '2025-09-10 02:23:33.753387', 'step': 3644, 'epoch': 2} {'type': 'loss', 'content': 0.012203511781990528, 'timestamp': '2025-09-10 02:23:33.763904', 'step': 3645, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 336], 'flops': 9966940982208}, 'timestamp': '2025-09-10 02:23:33.797518', 'step': 3645, 'epoch': 2} {'type': 'loss', 'content': 0.0037501014303416014, 'timestamp': '2025-09-10 02:23:33.810878', 'step': 3646, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 6644714900096}, 'timestamp': '2025-09-10 02:23:33.843012', 'step': 3646, 'epoch': 2} {'type': 'loss', 'content': 0.006922434084117413, 'timestamp': '2025-09-10 02:23:33.849942', 'step': 3647, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 6644714900096}, 'timestamp': '2025-09-10 02:23:33.880986', 'step': 3647, 'epoch': 2} {'type': 'loss', 'content': 0.0018579624593257904, 'timestamp': '2025-09-10 02:23:33.908723', 'step': 3648, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 176], 'flops': 5220903722048}, 'timestamp': '2025-09-10 02:23:33.939821', 'step': 3648, 'epoch': 2} {'type': 'loss', 'content': 0.0014378555351868272, 'timestamp': '2025-09-10 02:23:33.943463', 'step': 3649, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 8068526078144}, 'timestamp': '2025-09-10 02:23:33.975165', 'step': 3649, 'epoch': 2} {'type': 'loss', 'content': 0.004263308364897966, 'timestamp': '2025-09-10 02:23:33.985314', 'step': 3650, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 8543129804160}, 'timestamp': '2025-09-10 02:23:34.015655', 'step': 3650, 'epoch': 2} {'type': 'loss', 'content': 0.006144766230136156, 'timestamp': '2025-09-10 02:23:34.026553', 'step': 3651, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 4746299996032}, 'timestamp': '2025-09-10 02:23:34.057068', 'step': 3651, 'epoch': 2} {'type': 'loss', 'content': 0.0022812257520854473, 'timestamp': '2025-09-10 02:23:34.080881', 'step': 3652, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 7593922352128}, 'timestamp': '2025-09-10 02:23:34.111599', 'step': 3652, 'epoch': 2} {'type': 'loss', 'content': 0.013019556179642677, 'timestamp': '2025-09-10 02:23:34.117102', 'step': 3653, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 368], 'flops': 10916148434240}, 'timestamp': '2025-09-10 02:23:34.151180', 'step': 3653, 'epoch': 2} {'type': 'loss', 'content': 0.008377458900213242, 'timestamp': '2025-09-10 02:23:34.164994', 'step': 3654, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 8543129804160}, 'timestamp': '2025-09-10 02:23:34.195131', 'step': 3654, 'epoch': 2} {'type': 'loss', 'content': 0.003772433614358306, 'timestamp': '2025-09-10 02:23:34.206283', 'step': 3655, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 5695507448064}, 'timestamp': '2025-09-10 02:23:34.236552', 'step': 3655, 'epoch': 2} {'type': 'loss', 'content': 0.017940301448106766, 'timestamp': '2025-09-10 02:23:34.262014', 'step': 3656, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 176], 'flops': 5220903722048}, 'timestamp': '2025-09-10 02:23:34.294442', 'step': 3656, 'epoch': 2} {'type': 'loss', 'content': 0.0058494978584349155, 'timestamp': '2025-09-10 02:23:34.298294', 'step': 3657, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 304], 'flops': 9017733530176}, 'timestamp': '2025-09-10 02:23:34.330418', 'step': 3657, 'epoch': 2} {'type': 'loss', 'content': 0.02427353337407112, 'timestamp': '2025-09-10 02:23:34.342087', 'step': 3658, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 464], 'flops': 13763770790336}, 'timestamp': '2025-09-10 02:23:34.383500', 'step': 3658, 'epoch': 2} {'type': 'loss', 'content': 0.0017965204315260053, 'timestamp': '2025-09-10 02:23:34.400621', 'step': 3659, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 6644714900096}, 'timestamp': '2025-09-10 02:23:34.431989', 'step': 3659, 'epoch': 2} {'type': 'loss', 'content': 0.0022874141577631235, 'timestamp': '2025-09-10 02:23:34.459435', 'step': 3660, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 6644714900096}, 'timestamp': '2025-09-10 02:23:34.490403', 'step': 3660, 'epoch': 2} {'type': 'loss', 'content': 0.001652199076488614, 'timestamp': '2025-09-10 02:23:34.495126', 'step': 3661, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 416], 'flops': 12339959612288}, 'timestamp': '2025-09-10 02:23:34.534987', 'step': 3661, 'epoch': 2} {'type': 'loss', 'content': 0.002010711934417486, 'timestamp': '2025-09-10 02:23:34.550930', 'step': 3662, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-10 02:23:34.582182', 'step': 3662, 'epoch': 2} {'type': 'loss', 'content': 0.009694479405879974, 'timestamp': '2025-09-10 02:23:34.584510', 'step': 3663, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 8068526078144}, 'timestamp': '2025-09-10 02:23:34.616529', 'step': 3663, 'epoch': 2} {'type': 'loss', 'content': 0.007335420232266188, 'timestamp': '2025-09-10 02:23:34.647418', 'step': 3664, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 336], 'flops': 9966940982208}, 'timestamp': '2025-09-10 02:23:34.679314', 'step': 3664, 'epoch': 2} {'type': 'loss', 'content': 0.01509284321218729, 'timestamp': '2025-09-10 02:23:34.691955', 'step': 3665, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 7593922352128}, 'timestamp': '2025-09-10 02:23:34.722912', 'step': 3665, 'epoch': 2} {'type': 'loss', 'content': 0.03952633589506149, 'timestamp': '2025-09-10 02:23:34.730783', 'step': 3666, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 5695507448064}, 'timestamp': '2025-09-10 02:23:34.760989', 'step': 3666, 'epoch': 2} {'type': 'loss', 'content': 0.006663356442004442, 'timestamp': '2025-09-10 02:23:34.765594', 'step': 3667, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 7593922352128}, 'timestamp': '2025-09-10 02:23:34.796418', 'step': 3667, 'epoch': 2} {'type': 'loss', 'content': 0.018776053562760353, 'timestamp': '2025-09-10 02:23:34.824940', 'step': 3668, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 8068526078144}, 'timestamp': '2025-09-10 02:23:34.855613', 'step': 3668, 'epoch': 2} {'type': 'loss', 'content': 0.00801047496497631, 'timestamp': '2025-09-10 02:23:34.863532', 'step': 3669, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 8543129804160}, 'timestamp': '2025-09-10 02:23:34.894177', 'step': 3669, 'epoch': 2} {'type': 'loss', 'content': 0.03632910177111626, 'timestamp': '2025-09-10 02:23:34.905046', 'step': 3670, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 304], 'flops': 9017733530176}, 'timestamp': '2025-09-10 02:23:34.936404', 'step': 3670, 'epoch': 2} {'type': 'loss', 'content': 0.014353514648973942, 'timestamp': '2025-09-10 02:23:34.948727', 'step': 3671, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 6170111174080}, 'timestamp': '2025-09-10 02:23:34.980357', 'step': 3671, 'epoch': 2} {'type': 'loss', 'content': 0.00914282537996769, 'timestamp': '2025-09-10 02:23:35.007911', 'step': 3672, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 8543129804160}, 'timestamp': '2025-09-10 02:23:35.039344', 'step': 3672, 'epoch': 2} {'type': 'loss', 'content': 0.0020550028420984745, 'timestamp': '2025-09-10 02:23:35.047649', 'step': 3673, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 368], 'flops': 10916148434240}, 'timestamp': '2025-09-10 02:23:35.082542', 'step': 3673, 'epoch': 2} {'type': 'loss', 'content': 0.0013698196271434426, 'timestamp': '2025-09-10 02:23:35.096409', 'step': 3674, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 432], 'flops': 12814563338304}, 'timestamp': '2025-09-10 02:23:35.135446', 'step': 3674, 'epoch': 2} {'type': 'loss', 'content': 0.0010654388461261988, 'timestamp': '2025-09-10 02:23:35.151603', 'step': 3675, 'epoch': 2} {'type': 'flops', 'content': [{'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5695213340928}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3796808916480}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4429610391296}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4746011128704}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4746011128704}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6011614078336}, {'type': 'perplexity', 'in_batch_dim': [8, 736], 'batch_size': 8, 'flops': 14554433988352}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3796808916480}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4113209653888}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3796808916480}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4746011128704}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2847606704256}, {'type': 'perplexity', 'in_batch_dim': [8, 320], 'batch_size': 8, 'flops': 6328014815744}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5378812603520}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3796808916480}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4746011128704}, {'type': 'perplexity', 'in_batch_dim': [8, 352], 'batch_size': 8, 'flops': 6960816290560}, {'type': 'perplexity', 'in_batch_dim': [8, 384], 'batch_size': 8, 'flops': 7593617765376}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5378812603520}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4429610391296}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4113209653888}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5062411866112}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5062411866112}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4429610391296}, {'type': 'perplexity', 'in_batch_dim': [8, 480], 'batch_size': 8, 'flops': 9492022189824}, {'type': 'perplexity', 'in_batch_dim': [8, 448], 'batch_size': 8, 'flops': 8859220715008}, {'type': 'perplexity', 'in_batch_dim': [8, 320], 'batch_size': 8, 'flops': 6328014815744}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3796808916480}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6011614078336}, {'type': 'perplexity', 'in_batch_dim': [8, 320], 'batch_size': 8, 'flops': 6328014815744}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5695213340928}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4746011128704}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4429610391296}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5378812603520}, {'type': 'perplexity', 'in_batch_dim': [8, 544], 'batch_size': 8, 'flops': 10757625139456}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4429610391296}, {'type': 'perplexity', 'in_batch_dim': [8, 320], 'batch_size': 8, 'flops': 6328014815744}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4113209653888}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4113209653888}, {'type': 'perplexity', 'in_batch_dim': [8, 320], 'batch_size': 8, 'flops': 6328014815744}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3480408179072}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5062411866112}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6011614078336}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4429610391296}, {'type': 'perplexity', 'in_batch_dim': [8, 384], 'batch_size': 8, 'flops': 7593617765376}, {'type': 'perplexity', 'in_batch_dim': [8, 416], 'batch_size': 8, 'flops': 8226419240192}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4746011128704}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3480408179072}, {'type': 'perplexity', 'in_batch_dim': [8, 384], 'batch_size': 8, 'flops': 7593617765376}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3480408179072}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3480408179072}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5378812603520}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3796808916480}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4113209653888}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5062411866112}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5378812603520}, {'type': 'perplexity', 'in_batch_dim': [8, 320], 'batch_size': 8, 'flops': 6328014815744}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4429610391296}, {'type': 'perplexity', 'in_batch_dim': [8, 160], 'batch_size': 8, 'flops': 3164007441664}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4113209653888}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5695213340928}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3796808916480}, {'type': 'perplexity', 'in_batch_dim': [8, 160], 'batch_size': 8, 'flops': 3164007441664}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6011614078336}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5062411866112}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4429610391296}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3480408179072}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4746011128704}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3480408179072}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4113209653888}, {'type': 'perplexity', 'in_batch_dim': [8, 624], 'batch_size': 8, 'flops': 12339628826496}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4113209653888}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5695213340928}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5062411866112}, {'type': 'perplexity', 'in_batch_dim': [8, 384], 'batch_size': 8, 'flops': 7593617765376}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4429610391296}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4113209653888}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5062411866112}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4429610391296}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5695213340928}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3796808916480}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5378812603520}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4429610391296}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5378812603520}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3796808916480}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5695213340928}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4429610391296}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5062411866112}, {'type': 'perplexity', 'in_batch_dim': [8, 160], 'batch_size': 8, 'flops': 3164007441664}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4746011128704}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5695213340928}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3796808916480}, {'type': 'perplexity', 'in_batch_dim': [8, 416], 'batch_size': 8, 'flops': 8226419240192}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3796808916480}, {'type': 'perplexity', 'in_batch_dim': [8, 160], 'batch_size': 8, 'flops': 3164007441664}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4746011128704}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3796808916480}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2847606704256}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5378812603520}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4429610391296}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4113209653888}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4429610391296}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6011614078336}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4113209653888}, {'type': 'perplexity', 'in_batch_dim': [8, 320], 'batch_size': 8, 'flops': 6328014815744}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4746011128704}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4746011128704}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3796808916480}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5062411866112}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3480408179072}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3796808916480}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3480408179072}, {'type': 'perplexity', 'in_batch_dim': [8, 352], 'batch_size': 8, 'flops': 6960816290560}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5062411866112}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4429610391296}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6011614078336}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5062411866112}, {'type': 'perplexity', 'in_batch_dim': [8, 576], 'batch_size': 8, 'flops': 11390426614272}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4429610391296}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3796808916480}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6011614078336}, {'type': 'perplexity', 'in_batch_dim': [8, 432], 'batch_size': 8, 'flops': 8542819977600}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5378812603520}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5378812603520}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5062411866112}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4113209653888}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4113209653888}, {'type': 'perplexity', 'in_batch_dim': [8, 368], 'batch_size': 8, 'flops': 7277217027968}, {'type': 'perplexity', 'in_batch_dim': [8, 160], 'batch_size': 8, 'flops': 3164007441664}, {'type': 'perplexity', 'in_batch_dim': [8, 368], 'batch_size': 8, 'flops': 7277217027968}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3796808916480}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4746011128704}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4113209653888}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4429610391296}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5695213340928}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5695213340928}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5062411866112}, {'type': 'perplexity', 'in_batch_dim': [8, 432], 'batch_size': 8, 'flops': 8542819977600}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5062411866112}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4113209653888}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3480408179072}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3480408179072}, {'type': 'perplexity', 'in_batch_dim': [8, 160], 'batch_size': 8, 'flops': 3164007441664}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4113209653888}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3796808916480}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5062411866112}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 368], 'batch_size': 8, 'flops': 7277217027968}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5378812603520}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5378812603520}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5062411866112}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2847606704256}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4429610391296}, {'type': 'perplexity', 'in_batch_dim': [8, 320], 'batch_size': 8, 'flops': 6328014815744}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3796808916480}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4429610391296}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4113209653888}, {'type': 'perplexity', 'in_batch_dim': [8, 384], 'batch_size': 8, 'flops': 7593617765376}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5378812603520}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4746011128704}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3480408179072}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5062411866112}, {'type': 'perplexity', 'in_batch_dim': [8, 320], 'batch_size': 8, 'flops': 6328014815744}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5378812603520}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4113209653888}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4113209653888}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5695213340928}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3480408179072}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6011614078336}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4746011128704}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5378812603520}, {'type': 'perplexity', 'in_batch_dim': [8, 320], 'batch_size': 8, 'flops': 6328014815744}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4429610391296}, {'type': 'perplexity', 'in_batch_dim': [8, 320], 'batch_size': 8, 'flops': 6328014815744}, {'type': 'perplexity', 'in_batch_dim': [8, 416], 'batch_size': 8, 'flops': 8226419240192}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5062411866112}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5062411866112}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2847606704256}, {'type': 'perplexity', 'in_batch_dim': [8, 336], 'batch_size': 8, 'flops': 6644415553152}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6011614078336}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5062411866112}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4113209653888}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5695213340928}, {'type': 'perplexity', 'in_batch_dim': [8, 416], 'batch_size': 8, 'flops': 8226419240192}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3796808916480}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5695213340928}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4746011128704}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4429610391296}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4113209653888}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3480408179072}, {'type': 'perplexity', 'in_batch_dim': [8, 320], 'batch_size': 8, 'flops': 6328014815744}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3480408179072}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5378812603520}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6011614078336}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4429610391296}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5695213340928}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5695213340928}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3480408179072}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4429610391296}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5062411866112}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3480408179072}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4113209653888}, {'type': 'perplexity', 'in_batch_dim': [8, 352], 'batch_size': 8, 'flops': 6960816290560}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4113209653888}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2847606704256}, {'type': 'perplexity', 'in_batch_dim': [8, 160], 'batch_size': 8, 'flops': 3164007441664}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3796808916480}, {'type': 'perplexity', 'in_batch_dim': [8, 496], 'batch_size': 8, 'flops': 9808422927232}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3480408179072}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4113209653888}, {'type': 'perplexity', 'in_batch_dim': [8, 544], 'batch_size': 8, 'flops': 10757625139456}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6011614078336}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5378812603520}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5695213340928}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4746011128704}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5695213340928}, {'type': 'perplexity', 'in_batch_dim': [8, 384], 'batch_size': 8, 'flops': 7593617765376}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3480408179072}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5378812603520}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3480408179072}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5378812603520}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5062411866112}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3796808916480}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5062411866112}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5695213340928}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5378812603520}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4429610391296}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5062411866112}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3480408179072}, {'type': 'perplexity', 'in_batch_dim': [8, 656], 'batch_size': 8, 'flops': 12972430301312}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5062411866112}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3796808916480}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3480408179072}, {'type': 'perplexity', 'in_batch_dim': [8, 160], 'batch_size': 8, 'flops': 3164007441664}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5695213340928}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6011614078336}, {'type': 'perplexity', 'in_batch_dim': [8, 320], 'batch_size': 8, 'flops': 6328014815744}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5062411866112}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5062411866112}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5695213340928}, {'type': 'perplexity', 'in_batch_dim': [8, 320], 'batch_size': 8, 'flops': 6328014815744}, {'type': 'perplexity', 'in_batch_dim': [8, 384], 'batch_size': 8, 'flops': 7593617765376}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5062411866112}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4746011128704}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4113209653888}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4746011128704}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4746011128704}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3480408179072}, {'type': 'perplexity', 'in_batch_dim': [8, 320], 'batch_size': 8, 'flops': 6328014815744}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5695213340928}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5062411866112}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4113209653888}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4746011128704}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2847606704256}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3796808916480}, {'type': 'perplexity', 'in_batch_dim': [8, 336], 'batch_size': 8, 'flops': 6644415553152}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4113209653888}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4746011128704}, {'type': 'perplexity', 'in_batch_dim': [8, 496], 'batch_size': 8, 'flops': 9808422927232}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5378812603520}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5378812603520}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4429610391296}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5062411866112}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6011614078336}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4746011128704}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3796808916480}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4113209653888}, {'type': 'perplexity', 'in_batch_dim': [8, 496], 'batch_size': 8, 'flops': 9808422927232}, {'type': 'perplexity', 'in_batch_dim': [8, 448], 'batch_size': 8, 'flops': 8859220715008}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6011614078336}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3796808916480}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3480408179072}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5062411866112}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4429610391296}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4113209653888}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4746011128704}, {'type': 'perplexity', 'in_batch_dim': [8, 320], 'batch_size': 8, 'flops': 6328014815744}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6011614078336}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3480408179072}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3796808916480}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3796808916480}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4746011128704}, {'type': 'perplexity', 'in_batch_dim': [8, 416], 'batch_size': 8, 'flops': 8226419240192}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5695213340928}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6011614078336}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6011614078336}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4746011128704}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3480408179072}, {'type': 'perplexity', 'in_batch_dim': [8, 160], 'batch_size': 8, 'flops': 3164007441664}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5695213340928}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3480408179072}, {'type': 'perplexity', 'in_batch_dim': [8, 400], 'batch_size': 8, 'flops': 7910018502784}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4429610391296}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5378812603520}, {'type': 'perplexity', 'in_batch_dim': [8, 400], 'batch_size': 8, 'flops': 7910018502784}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5378812603520}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3480408179072}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6011614078336}, {'type': 'perplexity', 'in_batch_dim': [8, 160], 'batch_size': 8, 'flops': 3164007441664}, {'type': 'perplexity', 'in_batch_dim': [8, 352], 'batch_size': 8, 'flops': 6960816290560}, {'type': 'perplexity', 'in_batch_dim': [8, 320], 'batch_size': 8, 'flops': 6328014815744}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5062411866112}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5062411866112}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4113209653888}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4429610391296}, {'type': 'perplexity', 'in_batch_dim': [8, 352], 'batch_size': 8, 'flops': 6960816290560}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4113209653888}, {'type': 'perplexity', 'in_batch_dim': [8, 160], 'batch_size': 8, 'flops': 3164007441664}, {'type': 'perplexity', 'in_batch_dim': [8, 336], 'batch_size': 8, 'flops': 6644415553152}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4429610391296}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3796808916480}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5378812603520}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5378812603520}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4113209653888}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5378812603520}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3796808916480}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5378812603520}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5378812603520}, {'type': 'perplexity', 'in_batch_dim': [8, 160], 'batch_size': 8, 'flops': 3164007441664}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3480408179072}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6011614078336}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5695213340928}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5378812603520}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5378812603520}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4429610391296}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4746011128704}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6011614078336}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5062411866112}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6011614078336}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5378812603520}, {'type': 'perplexity', 'in_batch_dim': [8, 160], 'batch_size': 8, 'flops': 3164007441664}, {'type': 'perplexity', 'in_batch_dim': [8, 160], 'batch_size': 8, 'flops': 3164007441664}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5062411866112}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5062411866112}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3796808916480}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6011614078336}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4113209653888}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4746011128704}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5378812603520}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3480408179072}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3796808916480}, {'type': 'perplexity', 'in_batch_dim': [8, 320], 'batch_size': 8, 'flops': 6328014815744}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5062411866112}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4746011128704}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5062411866112}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4746011128704}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6011614078336}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3796808916480}, {'type': 'perplexity', 'in_batch_dim': [8, 160], 'batch_size': 8, 'flops': 3164007441664}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2847606704256}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4429610391296}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4746011128704}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5062411866112}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5378812603520}, {'type': 'perplexity', 'in_batch_dim': [8, 464], 'batch_size': 8, 'flops': 9175621452416}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4113209653888}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3480408179072}, {'type': 'perplexity', 'in_batch_dim': [8, 384], 'batch_size': 8, 'flops': 7593617765376}, {'type': 'perplexity', 'in_batch_dim': [8, 448], 'batch_size': 8, 'flops': 8859220715008}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5378812603520}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4113209653888}, {'type': 'perplexity', 'in_batch_dim': [8, 320], 'batch_size': 8, 'flops': 6328014815744}, {'type': 'perplexity', 'in_batch_dim': [8, 560], 'batch_size': 8, 'flops': 11074025876864}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5378812603520}, {'type': 'perplexity', 'in_batch_dim': [8, 160], 'batch_size': 8, 'flops': 3164007441664}, {'type': 'perplexity', 'in_batch_dim': [8, 320], 'batch_size': 8, 'flops': 6328014815744}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5062411866112}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6011614078336}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4746011128704}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6011614078336}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3796808916480}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4429610391296}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4113209653888}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4746011128704}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3480408179072}, {'type': 'perplexity', 'in_batch_dim': [8, 384], 'batch_size': 8, 'flops': 7593617765376}, {'type': 'perplexity', 'in_batch_dim': [8, 576], 'batch_size': 8, 'flops': 11390426614272}, {'type': 'perplexity', 'in_batch_dim': [8, 160], 'batch_size': 8, 'flops': 3164007441664}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4746011128704}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4746011128704}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4429610391296}, {'type': 'perplexity', 'in_batch_dim': [8, 336], 'batch_size': 8, 'flops': 6644415553152}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4746011128704}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3796808916480}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3796808916480}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5062411866112}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5378812603520}, {'type': 'perplexity', 'in_batch_dim': [8, 368], 'batch_size': 8, 'flops': 7277217027968}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3480408179072}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4113209653888}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4746011128704}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3480408179072}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4746011128704}, {'type': 'perplexity', 'in_batch_dim': [8, 1168], 'batch_size': 8, 'flops': 23097253898368}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5062411866112}, {'type': 'perplexity', 'in_batch_dim': [8, 160], 'batch_size': 8, 'flops': 3164007441664}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4746011128704}, {'type': 'perplexity', 'in_batch_dim': [8, 640], 'batch_size': 8, 'flops': 12656029563904}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5378812603520}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4429610391296}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4429610391296}, {'type': 'perplexity', 'in_batch_dim': [8, 384], 'batch_size': 8, 'flops': 7593617765376}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3480408179072}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4113209653888}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3796808916480}, {'type': 'perplexity', 'in_batch_dim': [8, 336], 'batch_size': 8, 'flops': 6644415553152}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3480408179072}, {'type': 'perplexity', 'in_batch_dim': [6, 208], 'batch_size': 8, 'flops': 4113209653888}], 'timestamp': '2025-09-10 02:23:45.394972', 'step': 3675, 'epoch': 2} {'type': 'pplx', 'content': 21047841.64222782, 'timestamp': '2025-09-10 02:23:45.398778', 'step': 3675, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 7119318626112}, 'timestamp': '2025-09-10 02:23:45.430060', 'step': 3675, 'epoch': 2} {'type': 'loss', 'content': 0.0022460322361439466, 'timestamp': '2025-09-10 02:23:45.456947', 'step': 3676, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 320], 'flops': 9492337256192}, 'timestamp': '2025-09-10 02:23:45.488804', 'step': 3676, 'epoch': 2} {'type': 'loss', 'content': 0.00032661884324625134, 'timestamp': '2025-09-10 02:23:45.498535', 'step': 3677, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 6644714900096}, 'timestamp': '2025-09-10 02:23:45.531376', 'step': 3677, 'epoch': 2} {'type': 'loss', 'content': 0.0034290028270334005, 'timestamp': '2025-09-10 02:23:45.538293', 'step': 3678, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 7593922352128}, 'timestamp': '2025-09-10 02:23:45.569174', 'step': 3678, 'epoch': 2} {'type': 'loss', 'content': 0.001095048151910305, 'timestamp': '2025-09-10 02:23:45.576755', 'step': 3679, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 320], 'flops': 9492337256192}, 'timestamp': '2025-09-10 02:23:45.607998', 'step': 3679, 'epoch': 2} {'type': 'loss', 'content': 0.005293484777212143, 'timestamp': '2025-09-10 02:23:45.641456', 'step': 3680, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 336], 'flops': 9966940982208}, 'timestamp': '2025-09-10 02:23:45.673169', 'step': 3680, 'epoch': 2} {'type': 'loss', 'content': 0.005093970336019993, 'timestamp': '2025-09-10 02:23:45.685830', 'step': 3681, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 304], 'flops': 9017733530176}, 'timestamp': '2025-09-10 02:23:45.716501', 'step': 3681, 'epoch': 2} {'type': 'loss', 'content': 0.0007117848144844174, 'timestamp': '2025-09-10 02:23:45.728651', 'step': 3682, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 8543129804160}, 'timestamp': '2025-09-10 02:23:45.759255', 'step': 3682, 'epoch': 2} {'type': 'loss', 'content': 0.00283992663025856, 'timestamp': '2025-09-10 02:23:45.770063', 'step': 3683, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 336], 'flops': 9966940982208}, 'timestamp': '2025-09-10 02:23:45.803452', 'step': 3683, 'epoch': 2} {'type': 'loss', 'content': 0.029055939987301826, 'timestamp': '2025-09-10 02:23:45.837780', 'step': 3684, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 7593922352128}, 'timestamp': '2025-09-10 02:23:45.868244', 'step': 3684, 'epoch': 2} {'type': 'loss', 'content': 0.000742242147680372, 'timestamp': '2025-09-10 02:23:45.873656', 'step': 3685, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 6644714900096}, 'timestamp': '2025-09-10 02:23:45.903744', 'step': 3685, 'epoch': 2} {'type': 'loss', 'content': 0.007362429518252611, 'timestamp': '2025-09-10 02:23:45.910834', 'step': 3686, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 320], 'flops': 9492337256192}, 'timestamp': '2025-09-10 02:23:45.941967', 'step': 3686, 'epoch': 2} {'type': 'loss', 'content': 0.004590542521327734, 'timestamp': '2025-09-10 02:23:45.954565', 'step': 3687, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 336], 'flops': 9966940982208}, 'timestamp': '2025-09-10 02:23:45.987952', 'step': 3687, 'epoch': 2} {'type': 'loss', 'content': 0.010698004625737667, 'timestamp': '2025-09-10 02:23:46.022265', 'step': 3688, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 5695507448064}, 'timestamp': '2025-09-10 02:23:46.053140', 'step': 3688, 'epoch': 2} {'type': 'loss', 'content': 0.0017821193905547261, 'timestamp': '2025-09-10 02:23:46.055294', 'step': 3689, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 304], 'flops': 9017733530176}, 'timestamp': '2025-09-10 02:23:46.085878', 'step': 3689, 'epoch': 2} {'type': 'loss', 'content': 0.010044259950518608, 'timestamp': '2025-09-10 02:23:46.097980', 'step': 3690, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 8543129804160}, 'timestamp': '2025-09-10 02:23:46.129333', 'step': 3690, 'epoch': 2} {'type': 'loss', 'content': 0.0010982693638652563, 'timestamp': '2025-09-10 02:23:46.140338', 'step': 3691, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 352], 'flops': 10441544708224}, 'timestamp': '2025-09-10 02:23:46.175154', 'step': 3691, 'epoch': 2} {'type': 'loss', 'content': 0.005969376303255558, 'timestamp': '2025-09-10 02:23:46.209750', 'step': 3692, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 336], 'flops': 9966940982208}, 'timestamp': '2025-09-10 02:23:46.241338', 'step': 3692, 'epoch': 2} {'type': 'loss', 'content': 0.0015076607232913375, 'timestamp': '2025-09-10 02:23:46.253943', 'step': 3693, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 5695507448064}, 'timestamp': '2025-09-10 02:23:46.285232', 'step': 3693, 'epoch': 2} {'type': 'loss', 'content': 0.0018409850308671594, 'timestamp': '2025-09-10 02:23:46.289800', 'step': 3694, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 6170111174080}, 'timestamp': '2025-09-10 02:23:46.319885', 'step': 3694, 'epoch': 2} {'type': 'loss', 'content': 0.0012192835565656424, 'timestamp': '2025-09-10 02:23:46.326508', 'step': 3695, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 6170111174080}, 'timestamp': '2025-09-10 02:23:46.359303', 'step': 3695, 'epoch': 2} {'type': 'loss', 'content': 0.002559660468250513, 'timestamp': '2025-09-10 02:23:46.386965', 'step': 3696, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 304], 'flops': 9017733530176}, 'timestamp': '2025-09-10 02:23:46.426410', 'step': 3696, 'epoch': 2} {'type': 'loss', 'content': 0.0033039411064237356, 'timestamp': '2025-09-10 02:23:46.436026', 'step': 3697, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 8068526078144}, 'timestamp': '2025-09-10 02:23:46.472303', 'step': 3697, 'epoch': 2} {'type': 'loss', 'content': 0.00010749106149887666, 'timestamp': '2025-09-10 02:23:46.482552', 'step': 3698, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 8068526078144}, 'timestamp': '2025-09-10 02:23:46.513260', 'step': 3698, 'epoch': 2} {'type': 'loss', 'content': 0.025171758607029915, 'timestamp': '2025-09-10 02:23:46.523468', 'step': 3699, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 7119318626112}, 'timestamp': '2025-09-10 02:23:46.553613', 'step': 3699, 'epoch': 2} {'type': 'loss', 'content': 0.0031803150195628405, 'timestamp': '2025-09-10 02:23:46.581832', 'step': 3700, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 304], 'flops': 9017733530176}, 'timestamp': '2025-09-10 02:23:46.612784', 'step': 3700, 'epoch': 2} {'type': 'loss', 'content': 0.00467759370803833, 'timestamp': '2025-09-10 02:23:46.622624', 'step': 3701, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 5695507448064}, 'timestamp': '2025-09-10 02:23:46.652511', 'step': 3701, 'epoch': 2} {'type': 'loss', 'content': 0.0028708036988973618, 'timestamp': '2025-09-10 02:23:46.657095', 'step': 3702, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 8543129804160}, 'timestamp': '2025-09-10 02:23:46.688080', 'step': 3702, 'epoch': 2} {'type': 'loss', 'content': 0.0009561380720697343, 'timestamp': '2025-09-10 02:23:46.699122', 'step': 3703, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 6170111174080}, 'timestamp': '2025-09-10 02:23:46.730072', 'step': 3703, 'epoch': 2} {'type': 'loss', 'content': 0.001121461158618331, 'timestamp': '2025-09-10 02:23:46.757739', 'step': 3704, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 7119318626112}, 'timestamp': '2025-09-10 02:23:46.788304', 'step': 3704, 'epoch': 2} {'type': 'loss', 'content': 0.005151792895048857, 'timestamp': '2025-09-10 02:23:46.793886', 'step': 3705, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 8068526078144}, 'timestamp': '2025-09-10 02:23:46.825904', 'step': 3705, 'epoch': 2} {'type': 'loss', 'content': 0.00032375051523558795, 'timestamp': '2025-09-10 02:23:46.835936', 'step': 3706, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 6170111174080}, 'timestamp': '2025-09-10 02:23:46.867444', 'step': 3706, 'epoch': 2} {'type': 'loss', 'content': 0.005311047192662954, 'timestamp': '2025-09-10 02:23:46.874324', 'step': 3707, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 320], 'flops': 9492337256192}, 'timestamp': '2025-09-10 02:23:46.904803', 'step': 3707, 'epoch': 2} {'type': 'loss', 'content': 0.018711045384407043, 'timestamp': '2025-09-10 02:23:46.938269', 'step': 3708, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 368], 'flops': 10916148434240}, 'timestamp': '2025-09-10 02:23:46.970344', 'step': 3708, 'epoch': 2} {'type': 'loss', 'content': 0.00399330398067832, 'timestamp': '2025-09-10 02:23:46.983435', 'step': 3709, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 320], 'flops': 9492337256192}, 'timestamp': '2025-09-10 02:23:47.015316', 'step': 3709, 'epoch': 2} {'type': 'loss', 'content': 0.0006306317518465221, 'timestamp': '2025-09-10 02:23:47.027878', 'step': 3710, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 8068526078144}, 'timestamp': '2025-09-10 02:23:47.061711', 'step': 3710, 'epoch': 2} {'type': 'loss', 'content': 0.00018934406398329884, 'timestamp': '2025-09-10 02:23:47.071989', 'step': 3711, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 4746299996032}, 'timestamp': '2025-09-10 02:23:47.104274', 'step': 3711, 'epoch': 2} {'type': 'loss', 'content': 0.013638163916766644, 'timestamp': '2025-09-10 02:23:47.128327', 'step': 3712, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 6644714900096}, 'timestamp': '2025-09-10 02:23:47.162414', 'step': 3712, 'epoch': 2} {'type': 'loss', 'content': 0.031886231154203415, 'timestamp': '2025-09-10 02:23:47.166905', 'step': 3713, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 368], 'flops': 10916148434240}, 'timestamp': '2025-09-10 02:23:47.204158', 'step': 3713, 'epoch': 2} {'type': 'loss', 'content': 0.0013043539365753531, 'timestamp': '2025-09-10 02:23:47.217908', 'step': 3714, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 176], 'flops': 5220903722048}, 'timestamp': '2025-09-10 02:23:47.253639', 'step': 3714, 'epoch': 2} {'type': 'loss', 'content': 5.598477218882181e-05, 'timestamp': '2025-09-10 02:23:47.257454', 'step': 3715, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 320], 'flops': 9492337256192}, 'timestamp': '2025-09-10 02:23:47.294393', 'step': 3715, 'epoch': 2} {'type': 'loss', 'content': 0.00044489253195934, 'timestamp': '2025-09-10 02:23:47.327746', 'step': 3716, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 6644714900096}, 'timestamp': '2025-09-10 02:23:47.363529', 'step': 3716, 'epoch': 2} {'type': 'loss', 'content': 0.0010741885052993894, 'timestamp': '2025-09-10 02:23:47.368139', 'step': 3717, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 8068526078144}, 'timestamp': '2025-09-10 02:23:47.418380', 'step': 3717, 'epoch': 2} {'type': 'loss', 'content': 0.003896048292517662, 'timestamp': '2025-09-10 02:23:47.428484', 'step': 3718, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 320], 'flops': 9492337256192}, 'timestamp': '2025-09-10 02:23:47.470970', 'step': 3718, 'epoch': 2} {'type': 'loss', 'content': 0.002632845425978303, 'timestamp': '2025-09-10 02:23:47.483553', 'step': 3719, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 352], 'flops': 10441544708224}, 'timestamp': '2025-09-10 02:23:47.529925', 'step': 3719, 'epoch': 2} {'type': 'loss', 'content': 0.023609664291143417, 'timestamp': '2025-09-10 02:23:47.564504', 'step': 3720, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 8068526078144}, 'timestamp': '2025-09-10 02:23:47.605360', 'step': 3720, 'epoch': 2} {'type': 'loss', 'content': 0.018469881266355515, 'timestamp': '2025-09-10 02:23:47.613216', 'step': 3721, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 368], 'flops': 10916148434240}, 'timestamp': '2025-09-10 02:23:47.657401', 'step': 3721, 'epoch': 2} {'type': 'loss', 'content': 0.0002696176525205374, 'timestamp': '2025-09-10 02:23:47.671202', 'step': 3722, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 7593922352128}, 'timestamp': '2025-09-10 02:23:47.703080', 'step': 3722, 'epoch': 2} {'type': 'loss', 'content': 0.01182649191468954, 'timestamp': '2025-09-10 02:23:47.710965', 'step': 3723, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 6644714900096}, 'timestamp': '2025-09-10 02:23:47.741115', 'step': 3723, 'epoch': 2} {'type': 'loss', 'content': 0.0016434434801340103, 'timestamp': '2025-09-10 02:23:47.769176', 'step': 3724, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 8068526078144}, 'timestamp': '2025-09-10 02:23:47.801840', 'step': 3724, 'epoch': 2} {'type': 'loss', 'content': 0.009728246368467808, 'timestamp': '2025-09-10 02:23:47.809393', 'step': 3725, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 6644714900096}, 'timestamp': '2025-09-10 02:23:47.842116', 'step': 3725, 'epoch': 2} {'type': 'loss', 'content': 0.0011867971625179052, 'timestamp': '2025-09-10 02:23:47.849032', 'step': 3726, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 336], 'flops': 9966940982208}, 'timestamp': '2025-09-10 02:23:47.885747', 'step': 3726, 'epoch': 2} {'type': 'loss', 'content': 0.010371259413659573, 'timestamp': '2025-09-10 02:23:47.899151', 'step': 3727, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 8068526078144}, 'timestamp': '2025-09-10 02:23:47.932148', 'step': 3727, 'epoch': 2} {'type': 'loss', 'content': 0.00210155313834548, 'timestamp': '2025-09-10 02:23:47.962867', 'step': 3728, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 336], 'flops': 9966940982208}, 'timestamp': '2025-09-10 02:23:47.995654', 'step': 3728, 'epoch': 2} {'type': 'loss', 'content': 0.0019191886531189084, 'timestamp': '2025-09-10 02:23:48.008286', 'step': 3729, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 5695507448064}, 'timestamp': '2025-09-10 02:23:48.039506', 'step': 3729, 'epoch': 2} {'type': 'loss', 'content': 0.00013749170466326177, 'timestamp': '2025-09-10 02:23:48.044059', 'step': 3730, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 7593922352128}, 'timestamp': '2025-09-10 02:23:48.077114', 'step': 3730, 'epoch': 2} {'type': 'loss', 'content': 0.002893412485718727, 'timestamp': '2025-09-10 02:23:48.084814', 'step': 3731, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 7593922352128}, 'timestamp': '2025-09-10 02:23:48.117583', 'step': 3731, 'epoch': 2} {'type': 'loss', 'content': 0.0007469491683878005, 'timestamp': '2025-09-10 02:23:48.146222', 'step': 3732, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 8543129804160}, 'timestamp': '2025-09-10 02:23:48.178024', 'step': 3732, 'epoch': 2} {'type': 'loss', 'content': 0.0003932146355509758, 'timestamp': '2025-09-10 02:23:48.186459', 'step': 3733, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 4746299996032}, 'timestamp': '2025-09-10 02:23:48.218119', 'step': 3733, 'epoch': 2} {'type': 'loss', 'content': 8.557453838875517e-05, 'timestamp': '2025-09-10 02:23:48.221036', 'step': 3734, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 8068526078144}, 'timestamp': '2025-09-10 02:23:48.253673', 'step': 3734, 'epoch': 2} {'type': 'loss', 'content': 0.0002127924090018496, 'timestamp': '2025-09-10 02:23:48.263724', 'step': 3735, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 4746299996032}, 'timestamp': '2025-09-10 02:23:48.295365', 'step': 3735, 'epoch': 2} {'type': 'loss', 'content': 0.00040841306326910853, 'timestamp': '2025-09-10 02:23:48.320578', 'step': 3736, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 624], 'flops': 18509808050496}, 'timestamp': '2025-09-10 02:23:48.369927', 'step': 3736, 'epoch': 2} {'type': 'loss', 'content': 0.0010468022665008903, 'timestamp': '2025-09-10 02:23:48.391699', 'step': 3737, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 6644714900096}, 'timestamp': '2025-09-10 02:23:48.421601', 'step': 3737, 'epoch': 2} {'type': 'loss', 'content': 0.0001885231613414362, 'timestamp': '2025-09-10 02:23:48.428506', 'step': 3738, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 176], 'flops': 5220903722048}, 'timestamp': '2025-09-10 02:23:48.458297', 'step': 3738, 'epoch': 2} {'type': 'loss', 'content': 3.569291584426537e-05, 'timestamp': '2025-09-10 02:23:48.462466', 'step': 3739, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 480], 'flops': 14238374516352}, 'timestamp': '2025-09-10 02:23:48.505255', 'step': 3739, 'epoch': 2} {'type': 'loss', 'content': 0.003039777046069503, 'timestamp': '2025-09-10 02:23:48.543505', 'step': 3740, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 320], 'flops': 9492337256192}, 'timestamp': '2025-09-10 02:23:48.574502', 'step': 3740, 'epoch': 2} {'type': 'loss', 'content': 0.006935932207852602, 'timestamp': '2025-09-10 02:23:48.584997', 'step': 3741, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 6170111174080}, 'timestamp': '2025-09-10 02:23:48.619172', 'step': 3741, 'epoch': 2} {'type': 'loss', 'content': 0.0013960804790258408, 'timestamp': '2025-09-10 02:23:48.626071', 'step': 3742, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 176], 'flops': 5220903722048}, 'timestamp': '2025-09-10 02:23:48.660000', 'step': 3742, 'epoch': 2} {'type': 'loss', 'content': 0.0006007336778566241, 'timestamp': '2025-09-10 02:23:48.664017', 'step': 3743, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 8543129804160}, 'timestamp': '2025-09-10 02:23:48.695629', 'step': 3743, 'epoch': 2} {'type': 'loss', 'content': 0.013558273203670979, 'timestamp': '2025-09-10 02:23:48.727061', 'step': 3744, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 5695507448064}, 'timestamp': '2025-09-10 02:23:48.758708', 'step': 3744, 'epoch': 2} {'type': 'loss', 'content': 0.0007245481247082353, 'timestamp': '2025-09-10 02:23:48.761187', 'step': 3745, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 6644714900096}, 'timestamp': '2025-09-10 02:23:48.791751', 'step': 3745, 'epoch': 2} {'type': 'loss', 'content': 0.05813758820295334, 'timestamp': '2025-09-10 02:23:48.798627', 'step': 3746, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 6170111174080}, 'timestamp': '2025-09-10 02:23:48.839439', 'step': 3746, 'epoch': 2} {'type': 'loss', 'content': 0.0002415095950709656, 'timestamp': '2025-09-10 02:23:48.845944', 'step': 3747, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 6644714900096}, 'timestamp': '2025-09-10 02:23:48.877586', 'step': 3747, 'epoch': 2} {'type': 'loss', 'content': 0.000962139165494591, 'timestamp': '2025-09-10 02:23:48.905331', 'step': 3748, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 4746299996032}, 'timestamp': '2025-09-10 02:23:48.936723', 'step': 3748, 'epoch': 2} {'type': 'loss', 'content': 0.0019527226686477661, 'timestamp': '2025-09-10 02:23:48.938587', 'step': 3749, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 6644714900096}, 'timestamp': '2025-09-10 02:23:48.968844', 'step': 3749, 'epoch': 2} {'type': 'loss', 'content': 0.0019534730818122625, 'timestamp': '2025-09-10 02:23:48.975883', 'step': 3750, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 320], 'flops': 9492337256192}, 'timestamp': '2025-09-10 02:23:49.007630', 'step': 3750, 'epoch': 2} {'type': 'loss', 'content': 0.0006131255067884922, 'timestamp': '2025-09-10 02:23:49.020206', 'step': 3751, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 416], 'flops': 12339959612288}, 'timestamp': '2025-09-10 02:23:49.058993', 'step': 3751, 'epoch': 2} {'type': 'loss', 'content': 0.0003716005012392998, 'timestamp': '2025-09-10 02:23:49.095806', 'step': 3752, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 8068526078144}, 'timestamp': '2025-09-10 02:23:49.126990', 'step': 3752, 'epoch': 2} {'type': 'loss', 'content': 0.0004358034930191934, 'timestamp': '2025-09-10 02:23:49.134452', 'step': 3753, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 176], 'flops': 5220903722048}, 'timestamp': '2025-09-10 02:23:49.165481', 'step': 3753, 'epoch': 2} {'type': 'loss', 'content': 0.00044732578680850565, 'timestamp': '2025-09-10 02:23:49.169363', 'step': 3754, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 7119318626112}, 'timestamp': '2025-09-10 02:23:49.201372', 'step': 3754, 'epoch': 2} {'type': 'loss', 'content': 0.004059514496475458, 'timestamp': '2025-09-10 02:23:49.208648', 'step': 3755, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 5695507448064}, 'timestamp': '2025-09-10 02:23:49.240352', 'step': 3755, 'epoch': 2} {'type': 'loss', 'content': 0.000174950881046243, 'timestamp': '2025-09-10 02:23:49.265504', 'step': 3756, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 4746299996032}, 'timestamp': '2025-09-10 02:23:49.304855', 'step': 3756, 'epoch': 2} {'type': 'loss', 'content': 0.003618494840338826, 'timestamp': '2025-09-10 02:23:49.307253', 'step': 3757, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 6644714900096}, 'timestamp': '2025-09-10 02:23:49.338656', 'step': 3757, 'epoch': 2} {'type': 'loss', 'content': 0.000870470714289695, 'timestamp': '2025-09-10 02:23:49.345475', 'step': 3758, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 7593922352128}, 'timestamp': '2025-09-10 02:23:49.380400', 'step': 3758, 'epoch': 2} {'type': 'loss', 'content': 0.0002803669194690883, 'timestamp': '2025-09-10 02:23:49.388113', 'step': 3759, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 8068526078144}, 'timestamp': '2025-09-10 02:23:49.419299', 'step': 3759, 'epoch': 2} {'type': 'loss', 'content': 0.02521214261651039, 'timestamp': '2025-09-10 02:23:49.449879', 'step': 3760, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 176], 'flops': 5220903722048}, 'timestamp': '2025-09-10 02:23:49.480870', 'step': 3760, 'epoch': 2} {'type': 'loss', 'content': 0.03576240316033363, 'timestamp': '2025-09-10 02:23:49.483063', 'step': 3761, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 608], 'flops': 18035204324480}, 'timestamp': '2025-09-10 02:23:49.534629', 'step': 3761, 'epoch': 2} {'type': 'loss', 'content': 0.043033067137002945, 'timestamp': '2025-09-10 02:23:49.556117', 'step': 3762, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 5695507448064}, 'timestamp': '2025-09-10 02:23:49.587429', 'step': 3762, 'epoch': 2} {'type': 'loss', 'content': 0.0002879269886761904, 'timestamp': '2025-09-10 02:23:49.591819', 'step': 3763, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 176], 'flops': 5220903722048}, 'timestamp': '2025-09-10 02:23:49.622714', 'step': 3763, 'epoch': 2} {'type': 'loss', 'content': 0.011223888956010342, 'timestamp': '2025-09-10 02:23:49.647657', 'step': 3764, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 5695507448064}, 'timestamp': '2025-09-10 02:23:49.678505', 'step': 3764, 'epoch': 2} {'type': 'loss', 'content': 0.0006490662926808, 'timestamp': '2025-09-10 02:23:49.680403', 'step': 3765, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 7119318626112}, 'timestamp': '2025-09-10 02:23:49.711153', 'step': 3765, 'epoch': 2} {'type': 'loss', 'content': 0.002436300739645958, 'timestamp': '2025-09-10 02:23:49.718328', 'step': 3766, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 5695507448064}, 'timestamp': '2025-09-10 02:23:49.749636', 'step': 3766, 'epoch': 2} {'type': 'loss', 'content': 0.03835447505116463, 'timestamp': '2025-09-10 02:23:49.753874', 'step': 3767, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 7593922352128}, 'timestamp': '2025-09-10 02:23:49.785074', 'step': 3767, 'epoch': 2} {'type': 'loss', 'content': 0.001985372742637992, 'timestamp': '2025-09-10 02:23:49.813463', 'step': 3768, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 5695507448064}, 'timestamp': '2025-09-10 02:23:49.845131', 'step': 3768, 'epoch': 2} {'type': 'loss', 'content': 0.005818284582346678, 'timestamp': '2025-09-10 02:23:49.847905', 'step': 3769, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 6170111174080}, 'timestamp': '2025-09-10 02:23:49.878162', 'step': 3769, 'epoch': 2} {'type': 'loss', 'content': 0.0013168009463697672, 'timestamp': '2025-09-10 02:23:49.885062', 'step': 3770, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 7119318626112}, 'timestamp': '2025-09-10 02:23:49.918288', 'step': 3770, 'epoch': 2} {'type': 'loss', 'content': 0.0015119427116587758, 'timestamp': '2025-09-10 02:23:49.925676', 'step': 3771, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 7593922352128}, 'timestamp': '2025-09-10 02:23:49.959836', 'step': 3771, 'epoch': 2} {'type': 'loss', 'content': 0.0005293331341817975, 'timestamp': '2025-09-10 02:23:49.987984', 'step': 3772, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 304], 'flops': 9017733530176}, 'timestamp': '2025-09-10 02:23:50.019957', 'step': 3772, 'epoch': 2} {'type': 'loss', 'content': 0.0002255245781270787, 'timestamp': '2025-09-10 02:23:50.029337', 'step': 3773, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 7119318626112}, 'timestamp': '2025-09-10 02:23:50.060821', 'step': 3773, 'epoch': 2} {'type': 'loss', 'content': 0.0010724698659032583, 'timestamp': '2025-09-10 02:23:50.068255', 'step': 3774, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 6170111174080}, 'timestamp': '2025-09-10 02:23:50.099297', 'step': 3774, 'epoch': 2} {'type': 'loss', 'content': 0.0030956987757235765, 'timestamp': '2025-09-10 02:23:50.105997', 'step': 3775, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 8543129804160}, 'timestamp': '2025-09-10 02:23:50.138577', 'step': 3775, 'epoch': 2} {'type': 'loss', 'content': 0.003467730712145567, 'timestamp': '2025-09-10 02:23:50.170347', 'step': 3776, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 7119318626112}, 'timestamp': '2025-09-10 02:23:50.201636', 'step': 3776, 'epoch': 2} {'type': 'loss', 'content': 0.0005490719340741634, 'timestamp': '2025-09-10 02:23:50.206811', 'step': 3777, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 7593922352128}, 'timestamp': '2025-09-10 02:23:50.239883', 'step': 3777, 'epoch': 2} {'type': 'loss', 'content': 0.0002172060776501894, 'timestamp': '2025-09-10 02:23:50.247433', 'step': 3778, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 7119318626112}, 'timestamp': '2025-09-10 02:23:50.278787', 'step': 3778, 'epoch': 2} {'type': 'loss', 'content': 0.002384532243013382, 'timestamp': '2025-09-10 02:23:50.286019', 'step': 3779, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 6644714900096}, 'timestamp': '2025-09-10 02:23:50.320002', 'step': 3779, 'epoch': 2} {'type': 'loss', 'content': 0.02834930457174778, 'timestamp': '2025-09-10 02:23:50.347846', 'step': 3780, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 5695507448064}, 'timestamp': '2025-09-10 02:23:50.378714', 'step': 3780, 'epoch': 2} {'type': 'loss', 'content': 0.006114445626735687, 'timestamp': '2025-09-10 02:23:50.381081', 'step': 3781, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 7119318626112}, 'timestamp': '2025-09-10 02:23:50.411840', 'step': 3781, 'epoch': 2} {'type': 'loss', 'content': 0.0004408101085573435, 'timestamp': '2025-09-10 02:23:50.419203', 'step': 3782, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 7119318626112}, 'timestamp': '2025-09-10 02:23:50.449842', 'step': 3782, 'epoch': 2} {'type': 'loss', 'content': 0.0024946555495262146, 'timestamp': '2025-09-10 02:23:50.457195', 'step': 3783, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 304], 'flops': 9017733530176}, 'timestamp': '2025-09-10 02:23:50.489297', 'step': 3783, 'epoch': 2} {'type': 'loss', 'content': 0.01270032487809658, 'timestamp': '2025-09-10 02:23:50.521844', 'step': 3784, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 7593922352128}, 'timestamp': '2025-09-10 02:23:50.553321', 'step': 3784, 'epoch': 2} {'type': 'loss', 'content': 0.0033720643259584904, 'timestamp': '2025-09-10 02:23:50.558695', 'step': 3785, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 6170111174080}, 'timestamp': '2025-09-10 02:23:50.589742', 'step': 3785, 'epoch': 2} {'type': 'loss', 'content': 0.05379801243543625, 'timestamp': '2025-09-10 02:23:50.596659', 'step': 3786, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 6644714900096}, 'timestamp': '2025-09-10 02:23:50.627235', 'step': 3786, 'epoch': 2} {'type': 'loss', 'content': 0.0017733937129378319, 'timestamp': '2025-09-10 02:23:50.634247', 'step': 3787, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 656], 'flops': 19459015502528}, 'timestamp': '2025-09-10 02:23:50.689983', 'step': 3787, 'epoch': 2} {'type': 'loss', 'content': 0.0037569236010313034, 'timestamp': '2025-09-10 02:23:50.734334', 'step': 3788, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 6170111174080}, 'timestamp': '2025-09-10 02:23:50.765113', 'step': 3788, 'epoch': 2} {'type': 'loss', 'content': 0.0019782905001193285, 'timestamp': '2025-09-10 02:23:50.769424', 'step': 3789, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 6170111174080}, 'timestamp': '2025-09-10 02:23:50.800084', 'step': 3789, 'epoch': 2} {'type': 'loss', 'content': 0.013935952447354794, 'timestamp': '2025-09-10 02:23:50.807050', 'step': 3790, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 5695507448064}, 'timestamp': '2025-09-10 02:23:50.847666', 'step': 3790, 'epoch': 2} {'type': 'loss', 'content': 0.01583055406808853, 'timestamp': '2025-09-10 02:23:50.852158', 'step': 3791, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 8543129804160}, 'timestamp': '2025-09-10 02:23:50.882829', 'step': 3791, 'epoch': 2} {'type': 'loss', 'content': 0.0005747873219661415, 'timestamp': '2025-09-10 02:23:50.914311', 'step': 3792, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 8068526078144}, 'timestamp': '2025-09-10 02:23:50.947726', 'step': 3792, 'epoch': 2} {'type': 'loss', 'content': 0.0003728387819137424, 'timestamp': '2025-09-10 02:23:50.955043', 'step': 3793, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 176], 'flops': 5220903722048}, 'timestamp': '2025-09-10 02:23:50.985737', 'step': 3793, 'epoch': 2} {'type': 'loss', 'content': 0.0017032746691256762, 'timestamp': '2025-09-10 02:23:50.989812', 'step': 3794, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-10 02:23:51.021167', 'step': 3794, 'epoch': 2} {'type': 'loss', 'content': 0.0027263087686151266, 'timestamp': '2025-09-10 02:23:51.023598', 'step': 3795, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 6644714900096}, 'timestamp': '2025-09-10 02:23:51.053872', 'step': 3795, 'epoch': 2} {'type': 'loss', 'content': 0.0019095286261290312, 'timestamp': '2025-09-10 02:23:51.081640', 'step': 3796, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 8068526078144}, 'timestamp': '2025-09-10 02:23:51.113023', 'step': 3796, 'epoch': 2} {'type': 'loss', 'content': 0.0055357408709824085, 'timestamp': '2025-09-10 02:23:51.120842', 'step': 3797, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 8543129804160}, 'timestamp': '2025-09-10 02:23:51.152210', 'step': 3797, 'epoch': 2} {'type': 'loss', 'content': 0.00228920322842896, 'timestamp': '2025-09-10 02:23:51.162997', 'step': 3798, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 6170111174080}, 'timestamp': '2025-09-10 02:23:51.193944', 'step': 3798, 'epoch': 2} {'type': 'loss', 'content': 0.0029537074733525515, 'timestamp': '2025-09-10 02:23:51.200712', 'step': 3799, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 7119318626112}, 'timestamp': '2025-09-10 02:23:51.231792', 'step': 3799, 'epoch': 2} {'type': 'loss', 'content': 0.009542775340378284, 'timestamp': '2025-09-10 02:23:51.259863', 'step': 3800, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 7593922352128}, 'timestamp': '2025-09-10 02:23:51.292129', 'step': 3800, 'epoch': 2} {'type': 'loss', 'content': 0.0010026551317423582, 'timestamp': '2025-09-10 02:23:51.297291', 'step': 3801, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 6644714900096}, 'timestamp': '2025-09-10 02:23:51.328429', 'step': 3801, 'epoch': 2} {'type': 'loss', 'content': 0.0001947238779393956, 'timestamp': '2025-09-10 02:23:51.335239', 'step': 3802, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 6644714900096}, 'timestamp': '2025-09-10 02:23:51.366633', 'step': 3802, 'epoch': 2} {'type': 'loss', 'content': 0.0014312856364995241, 'timestamp': '2025-09-10 02:23:51.373403', 'step': 3803, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 8068526078144}, 'timestamp': '2025-09-10 02:23:51.404802', 'step': 3803, 'epoch': 2} {'type': 'loss', 'content': 0.0010877539170905948, 'timestamp': '2025-09-10 02:23:51.435519', 'step': 3804, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 416], 'flops': 12339959612288}, 'timestamp': '2025-09-10 02:23:51.472136', 'step': 3804, 'epoch': 2} {'type': 'loss', 'content': 0.0061668092384934425, 'timestamp': '2025-09-10 02:23:51.487525', 'step': 3805, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 320], 'flops': 9492337256192}, 'timestamp': '2025-09-10 02:23:51.519845', 'step': 3805, 'epoch': 2} {'type': 'loss', 'content': 0.0013660427648574114, 'timestamp': '2025-09-10 02:23:51.531967', 'step': 3806, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 8068526078144}, 'timestamp': '2025-09-10 02:23:51.564750', 'step': 3806, 'epoch': 2} {'type': 'loss', 'content': 0.00044723015162162483, 'timestamp': '2025-09-10 02:23:51.574188', 'step': 3807, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 6644714900096}, 'timestamp': '2025-09-10 02:23:51.606066', 'step': 3807, 'epoch': 2} {'type': 'loss', 'content': 0.0004913516459055245, 'timestamp': '2025-09-10 02:23:51.633627', 'step': 3808, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 8068526078144}, 'timestamp': '2025-09-10 02:23:51.665052', 'step': 3808, 'epoch': 2} {'type': 'loss', 'content': 0.005763310939073563, 'timestamp': '2025-09-10 02:23:51.672360', 'step': 3809, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 7119318626112}, 'timestamp': '2025-09-10 02:23:51.704493', 'step': 3809, 'epoch': 2} {'type': 'loss', 'content': 0.0034396513365209103, 'timestamp': '2025-09-10 02:23:51.711547', 'step': 3810, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 320], 'flops': 9492337256192}, 'timestamp': '2025-09-10 02:23:51.742543', 'step': 3810, 'epoch': 2} {'type': 'loss', 'content': 0.012839804403483868, 'timestamp': '2025-09-10 02:23:51.754889', 'step': 3811, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 8543129804160}, 'timestamp': '2025-09-10 02:23:51.786891', 'step': 3811, 'epoch': 2} {'type': 'loss', 'content': 0.0012081711320206523, 'timestamp': '2025-09-10 02:23:51.818243', 'step': 3812, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 7119318626112}, 'timestamp': '2025-09-10 02:23:51.849541', 'step': 3812, 'epoch': 2} {'type': 'loss', 'content': 0.0012805964797735214, 'timestamp': '2025-09-10 02:23:51.854395', 'step': 3813, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 8543129804160}, 'timestamp': '2025-09-10 02:23:51.892405', 'step': 3813, 'epoch': 2} {'type': 'loss', 'content': 0.0016190716996788979, 'timestamp': '2025-09-10 02:23:51.902950', 'step': 3814, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 7119318626112}, 'timestamp': '2025-09-10 02:23:51.934567', 'step': 3814, 'epoch': 2} {'type': 'loss', 'content': 0.00160274060908705, 'timestamp': '2025-09-10 02:23:51.941937', 'step': 3815, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 6170111174080}, 'timestamp': '2025-09-10 02:23:51.973443', 'step': 3815, 'epoch': 2} {'type': 'loss', 'content': 0.007644836790859699, 'timestamp': '2025-09-10 02:23:52.000824', 'step': 3816, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 176], 'flops': 5220903722048}, 'timestamp': '2025-09-10 02:23:52.034481', 'step': 3816, 'epoch': 2} {'type': 'loss', 'content': 0.030178042128682137, 'timestamp': '2025-09-10 02:23:52.037320', 'step': 3817, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 7119318626112}, 'timestamp': '2025-09-10 02:23:52.069303', 'step': 3817, 'epoch': 2} {'type': 'loss', 'content': 0.00040458128205500543, 'timestamp': '2025-09-10 02:23:52.076280', 'step': 3818, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 7593922352128}, 'timestamp': '2025-09-10 02:23:52.107864', 'step': 3818, 'epoch': 2} {'type': 'loss', 'content': 0.021405117586255074, 'timestamp': '2025-09-10 02:23:52.115515', 'step': 3819, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 8543129804160}, 'timestamp': '2025-09-10 02:23:52.146252', 'step': 3819, 'epoch': 2} {'type': 'loss', 'content': 0.013643233105540276, 'timestamp': '2025-09-10 02:23:52.177599', 'step': 3820, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 5695507448064}, 'timestamp': '2025-09-10 02:23:52.209358', 'step': 3820, 'epoch': 2} {'type': 'loss', 'content': 0.004835444502532482, 'timestamp': '2025-09-10 02:23:52.211571', 'step': 3821, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 5695507448064}, 'timestamp': '2025-09-10 02:23:52.242137', 'step': 3821, 'epoch': 2} {'type': 'loss', 'content': 0.022363480180501938, 'timestamp': '2025-09-10 02:23:52.246433', 'step': 3822, 'epoch': 2} {'type': 'flops', 'content': [{'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5695213340928}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3796808916480}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4429610391296}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4746011128704}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4746011128704}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6011614078336}, {'type': 'perplexity', 'in_batch_dim': [8, 736], 'batch_size': 8, 'flops': 14554433988352}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3796808916480}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4113209653888}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3796808916480}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4746011128704}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2847606704256}, {'type': 'perplexity', 'in_batch_dim': [8, 320], 'batch_size': 8, 'flops': 6328014815744}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5378812603520}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3796808916480}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4746011128704}, {'type': 'perplexity', 'in_batch_dim': [8, 352], 'batch_size': 8, 'flops': 6960816290560}, {'type': 'perplexity', 'in_batch_dim': [8, 384], 'batch_size': 8, 'flops': 7593617765376}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5378812603520}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4429610391296}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4113209653888}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5062411866112}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5062411866112}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4429610391296}, {'type': 'perplexity', 'in_batch_dim': [8, 480], 'batch_size': 8, 'flops': 9492022189824}, {'type': 'perplexity', 'in_batch_dim': [8, 448], 'batch_size': 8, 'flops': 8859220715008}, {'type': 'perplexity', 'in_batch_dim': [8, 320], 'batch_size': 8, 'flops': 6328014815744}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3796808916480}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6011614078336}, {'type': 'perplexity', 'in_batch_dim': [8, 320], 'batch_size': 8, 'flops': 6328014815744}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5695213340928}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4746011128704}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4429610391296}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5378812603520}, {'type': 'perplexity', 'in_batch_dim': [8, 544], 'batch_size': 8, 'flops': 10757625139456}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4429610391296}, {'type': 'perplexity', 'in_batch_dim': [8, 320], 'batch_size': 8, 'flops': 6328014815744}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4113209653888}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4113209653888}, {'type': 'perplexity', 'in_batch_dim': [8, 320], 'batch_size': 8, 'flops': 6328014815744}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3480408179072}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5062411866112}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6011614078336}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4429610391296}, {'type': 'perplexity', 'in_batch_dim': [8, 384], 'batch_size': 8, 'flops': 7593617765376}, {'type': 'perplexity', 'in_batch_dim': [8, 416], 'batch_size': 8, 'flops': 8226419240192}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4746011128704}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3480408179072}, {'type': 'perplexity', 'in_batch_dim': [8, 384], 'batch_size': 8, 'flops': 7593617765376}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3480408179072}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3480408179072}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5378812603520}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3796808916480}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4113209653888}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5062411866112}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5378812603520}, {'type': 'perplexity', 'in_batch_dim': [8, 320], 'batch_size': 8, 'flops': 6328014815744}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4429610391296}, {'type': 'perplexity', 'in_batch_dim': [8, 160], 'batch_size': 8, 'flops': 3164007441664}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4113209653888}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5695213340928}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3796808916480}, {'type': 'perplexity', 'in_batch_dim': [8, 160], 'batch_size': 8, 'flops': 3164007441664}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6011614078336}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5062411866112}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4429610391296}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3480408179072}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4746011128704}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3480408179072}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4113209653888}, {'type': 'perplexity', 'in_batch_dim': [8, 624], 'batch_size': 8, 'flops': 12339628826496}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4113209653888}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5695213340928}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5062411866112}, {'type': 'perplexity', 'in_batch_dim': [8, 384], 'batch_size': 8, 'flops': 7593617765376}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4429610391296}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4113209653888}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5062411866112}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4429610391296}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5695213340928}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3796808916480}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5378812603520}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4429610391296}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5378812603520}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3796808916480}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5695213340928}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4429610391296}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5062411866112}, {'type': 'perplexity', 'in_batch_dim': [8, 160], 'batch_size': 8, 'flops': 3164007441664}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4746011128704}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5695213340928}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3796808916480}, {'type': 'perplexity', 'in_batch_dim': [8, 416], 'batch_size': 8, 'flops': 8226419240192}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3796808916480}, {'type': 'perplexity', 'in_batch_dim': [8, 160], 'batch_size': 8, 'flops': 3164007441664}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4746011128704}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3796808916480}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2847606704256}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5378812603520}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4429610391296}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4113209653888}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4429610391296}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6011614078336}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4113209653888}, {'type': 'perplexity', 'in_batch_dim': [8, 320], 'batch_size': 8, 'flops': 6328014815744}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4746011128704}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4746011128704}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3796808916480}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5062411866112}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3480408179072}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3796808916480}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3480408179072}, {'type': 'perplexity', 'in_batch_dim': [8, 352], 'batch_size': 8, 'flops': 6960816290560}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5062411866112}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4429610391296}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6011614078336}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5062411866112}, {'type': 'perplexity', 'in_batch_dim': [8, 576], 'batch_size': 8, 'flops': 11390426614272}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4429610391296}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3796808916480}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6011614078336}, {'type': 'perplexity', 'in_batch_dim': [8, 432], 'batch_size': 8, 'flops': 8542819977600}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5378812603520}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5378812603520}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5062411866112}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4113209653888}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4113209653888}, {'type': 'perplexity', 'in_batch_dim': [8, 368], 'batch_size': 8, 'flops': 7277217027968}, {'type': 'perplexity', 'in_batch_dim': [8, 160], 'batch_size': 8, 'flops': 3164007441664}, {'type': 'perplexity', 'in_batch_dim': [8, 368], 'batch_size': 8, 'flops': 7277217027968}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3796808916480}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4746011128704}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4113209653888}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4429610391296}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5695213340928}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5695213340928}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5062411866112}, {'type': 'perplexity', 'in_batch_dim': [8, 432], 'batch_size': 8, 'flops': 8542819977600}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5062411866112}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4113209653888}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3480408179072}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3480408179072}, {'type': 'perplexity', 'in_batch_dim': [8, 160], 'batch_size': 8, 'flops': 3164007441664}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4113209653888}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3796808916480}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5062411866112}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 368], 'batch_size': 8, 'flops': 7277217027968}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5378812603520}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5378812603520}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5062411866112}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2847606704256}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4429610391296}, {'type': 'perplexity', 'in_batch_dim': [8, 320], 'batch_size': 8, 'flops': 6328014815744}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3796808916480}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4429610391296}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4113209653888}, {'type': 'perplexity', 'in_batch_dim': [8, 384], 'batch_size': 8, 'flops': 7593617765376}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5378812603520}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4746011128704}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3480408179072}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5062411866112}, {'type': 'perplexity', 'in_batch_dim': [8, 320], 'batch_size': 8, 'flops': 6328014815744}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5378812603520}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4113209653888}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4113209653888}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5695213340928}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3480408179072}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6011614078336}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4746011128704}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5378812603520}, {'type': 'perplexity', 'in_batch_dim': [8, 320], 'batch_size': 8, 'flops': 6328014815744}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4429610391296}, {'type': 'perplexity', 'in_batch_dim': [8, 320], 'batch_size': 8, 'flops': 6328014815744}, {'type': 'perplexity', 'in_batch_dim': [8, 416], 'batch_size': 8, 'flops': 8226419240192}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5062411866112}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5062411866112}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2847606704256}, {'type': 'perplexity', 'in_batch_dim': [8, 336], 'batch_size': 8, 'flops': 6644415553152}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6011614078336}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5062411866112}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4113209653888}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5695213340928}, {'type': 'perplexity', 'in_batch_dim': [8, 416], 'batch_size': 8, 'flops': 8226419240192}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3796808916480}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5695213340928}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4746011128704}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4429610391296}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4113209653888}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3480408179072}, {'type': 'perplexity', 'in_batch_dim': [8, 320], 'batch_size': 8, 'flops': 6328014815744}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3480408179072}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5378812603520}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6011614078336}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4429610391296}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5695213340928}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5695213340928}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3480408179072}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4429610391296}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5062411866112}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3480408179072}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4113209653888}, {'type': 'perplexity', 'in_batch_dim': [8, 352], 'batch_size': 8, 'flops': 6960816290560}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4113209653888}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2847606704256}, {'type': 'perplexity', 'in_batch_dim': [8, 160], 'batch_size': 8, 'flops': 3164007441664}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3796808916480}, {'type': 'perplexity', 'in_batch_dim': [8, 496], 'batch_size': 8, 'flops': 9808422927232}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3480408179072}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4113209653888}, {'type': 'perplexity', 'in_batch_dim': [8, 544], 'batch_size': 8, 'flops': 10757625139456}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6011614078336}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5378812603520}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5695213340928}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4746011128704}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5695213340928}, {'type': 'perplexity', 'in_batch_dim': [8, 384], 'batch_size': 8, 'flops': 7593617765376}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3480408179072}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5378812603520}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3480408179072}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5378812603520}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5062411866112}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3796808916480}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5062411866112}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5695213340928}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5378812603520}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4429610391296}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5062411866112}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3480408179072}, {'type': 'perplexity', 'in_batch_dim': [8, 656], 'batch_size': 8, 'flops': 12972430301312}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5062411866112}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3796808916480}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3480408179072}, {'type': 'perplexity', 'in_batch_dim': [8, 160], 'batch_size': 8, 'flops': 3164007441664}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5695213340928}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6011614078336}, {'type': 'perplexity', 'in_batch_dim': [8, 320], 'batch_size': 8, 'flops': 6328014815744}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5062411866112}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5062411866112}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5695213340928}, {'type': 'perplexity', 'in_batch_dim': [8, 320], 'batch_size': 8, 'flops': 6328014815744}, {'type': 'perplexity', 'in_batch_dim': [8, 384], 'batch_size': 8, 'flops': 7593617765376}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5062411866112}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4746011128704}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4113209653888}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4746011128704}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4746011128704}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3480408179072}, {'type': 'perplexity', 'in_batch_dim': [8, 320], 'batch_size': 8, 'flops': 6328014815744}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5695213340928}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5062411866112}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4113209653888}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4746011128704}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2847606704256}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3796808916480}, {'type': 'perplexity', 'in_batch_dim': [8, 336], 'batch_size': 8, 'flops': 6644415553152}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4113209653888}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4746011128704}, {'type': 'perplexity', 'in_batch_dim': [8, 496], 'batch_size': 8, 'flops': 9808422927232}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5378812603520}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5378812603520}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4429610391296}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5062411866112}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6011614078336}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4746011128704}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3796808916480}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4113209653888}, {'type': 'perplexity', 'in_batch_dim': [8, 496], 'batch_size': 8, 'flops': 9808422927232}, {'type': 'perplexity', 'in_batch_dim': [8, 448], 'batch_size': 8, 'flops': 8859220715008}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6011614078336}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3796808916480}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3480408179072}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5062411866112}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4429610391296}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4113209653888}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4746011128704}, {'type': 'perplexity', 'in_batch_dim': [8, 320], 'batch_size': 8, 'flops': 6328014815744}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6011614078336}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3480408179072}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3796808916480}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3796808916480}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4746011128704}, {'type': 'perplexity', 'in_batch_dim': [8, 416], 'batch_size': 8, 'flops': 8226419240192}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5695213340928}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6011614078336}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6011614078336}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4746011128704}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3480408179072}, {'type': 'perplexity', 'in_batch_dim': [8, 160], 'batch_size': 8, 'flops': 3164007441664}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5695213340928}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3480408179072}, {'type': 'perplexity', 'in_batch_dim': [8, 400], 'batch_size': 8, 'flops': 7910018502784}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4429610391296}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5378812603520}, {'type': 'perplexity', 'in_batch_dim': [8, 400], 'batch_size': 8, 'flops': 7910018502784}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5378812603520}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3480408179072}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6011614078336}, {'type': 'perplexity', 'in_batch_dim': [8, 160], 'batch_size': 8, 'flops': 3164007441664}, {'type': 'perplexity', 'in_batch_dim': [8, 352], 'batch_size': 8, 'flops': 6960816290560}, {'type': 'perplexity', 'in_batch_dim': [8, 320], 'batch_size': 8, 'flops': 6328014815744}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5062411866112}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5062411866112}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4113209653888}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4429610391296}, {'type': 'perplexity', 'in_batch_dim': [8, 352], 'batch_size': 8, 'flops': 6960816290560}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4113209653888}, {'type': 'perplexity', 'in_batch_dim': [8, 160], 'batch_size': 8, 'flops': 3164007441664}, {'type': 'perplexity', 'in_batch_dim': [8, 336], 'batch_size': 8, 'flops': 6644415553152}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4429610391296}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3796808916480}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5378812603520}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5378812603520}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4113209653888}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5378812603520}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3796808916480}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5378812603520}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5378812603520}, {'type': 'perplexity', 'in_batch_dim': [8, 160], 'batch_size': 8, 'flops': 3164007441664}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3480408179072}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6011614078336}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5695213340928}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5378812603520}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5378812603520}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4429610391296}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4746011128704}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6011614078336}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5062411866112}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6011614078336}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5378812603520}, {'type': 'perplexity', 'in_batch_dim': [8, 160], 'batch_size': 8, 'flops': 3164007441664}, {'type': 'perplexity', 'in_batch_dim': [8, 160], 'batch_size': 8, 'flops': 3164007441664}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5062411866112}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5062411866112}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3796808916480}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6011614078336}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4113209653888}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4746011128704}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5378812603520}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3480408179072}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3796808916480}, {'type': 'perplexity', 'in_batch_dim': [8, 320], 'batch_size': 8, 'flops': 6328014815744}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5062411866112}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4746011128704}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5062411866112}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4746011128704}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6011614078336}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3796808916480}, {'type': 'perplexity', 'in_batch_dim': [8, 160], 'batch_size': 8, 'flops': 3164007441664}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2847606704256}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4429610391296}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4746011128704}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5062411866112}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5378812603520}, {'type': 'perplexity', 'in_batch_dim': [8, 464], 'batch_size': 8, 'flops': 9175621452416}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4113209653888}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3480408179072}, {'type': 'perplexity', 'in_batch_dim': [8, 384], 'batch_size': 8, 'flops': 7593617765376}, {'type': 'perplexity', 'in_batch_dim': [8, 448], 'batch_size': 8, 'flops': 8859220715008}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5378812603520}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4113209653888}, {'type': 'perplexity', 'in_batch_dim': [8, 320], 'batch_size': 8, 'flops': 6328014815744}, {'type': 'perplexity', 'in_batch_dim': [8, 560], 'batch_size': 8, 'flops': 11074025876864}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5378812603520}, {'type': 'perplexity', 'in_batch_dim': [8, 160], 'batch_size': 8, 'flops': 3164007441664}, {'type': 'perplexity', 'in_batch_dim': [8, 320], 'batch_size': 8, 'flops': 6328014815744}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5062411866112}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6011614078336}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4746011128704}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6011614078336}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3796808916480}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4429610391296}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4113209653888}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4746011128704}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3480408179072}, {'type': 'perplexity', 'in_batch_dim': [8, 384], 'batch_size': 8, 'flops': 7593617765376}, {'type': 'perplexity', 'in_batch_dim': [8, 576], 'batch_size': 8, 'flops': 11390426614272}, {'type': 'perplexity', 'in_batch_dim': [8, 160], 'batch_size': 8, 'flops': 3164007441664}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4746011128704}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4746011128704}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4429610391296}, {'type': 'perplexity', 'in_batch_dim': [8, 336], 'batch_size': 8, 'flops': 6644415553152}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4746011128704}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3796808916480}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3796808916480}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5062411866112}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5378812603520}, {'type': 'perplexity', 'in_batch_dim': [8, 368], 'batch_size': 8, 'flops': 7277217027968}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3480408179072}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4113209653888}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4746011128704}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3480408179072}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4746011128704}, {'type': 'perplexity', 'in_batch_dim': [8, 1168], 'batch_size': 8, 'flops': 23097253898368}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5062411866112}, {'type': 'perplexity', 'in_batch_dim': [8, 160], 'batch_size': 8, 'flops': 3164007441664}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4746011128704}, {'type': 'perplexity', 'in_batch_dim': [8, 640], 'batch_size': 8, 'flops': 12656029563904}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5378812603520}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4429610391296}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4429610391296}, {'type': 'perplexity', 'in_batch_dim': [8, 384], 'batch_size': 8, 'flops': 7593617765376}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3480408179072}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4113209653888}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3796808916480}, {'type': 'perplexity', 'in_batch_dim': [8, 336], 'batch_size': 8, 'flops': 6644415553152}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3480408179072}, {'type': 'perplexity', 'in_batch_dim': [6, 208], 'batch_size': 8, 'flops': 4113209653888}], 'timestamp': '2025-09-10 02:24:02.339761', 'step': 3822, 'epoch': 2} {'type': 'pplx', 'content': 19433033.667341556, 'timestamp': '2025-09-10 02:24:02.342208', 'step': 3822, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 480], 'flops': 14238374516352}, 'timestamp': '2025-09-10 02:24:02.381828', 'step': 3822, 'epoch': 2} {'type': 'loss', 'content': 0.043930236250162125, 'timestamp': '2025-09-10 02:24:02.399123', 'step': 3823, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 400], 'flops': 11865355886272}, 'timestamp': '2025-09-10 02:24:02.438505', 'step': 3823, 'epoch': 2} {'type': 'loss', 'content': 0.004557534120976925, 'timestamp': '2025-09-10 02:24:02.474955', 'step': 3824, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 7119318626112}, 'timestamp': '2025-09-10 02:24:02.509196', 'step': 3824, 'epoch': 2} {'type': 'loss', 'content': 0.0007657821988686919, 'timestamp': '2025-09-10 02:24:02.513420', 'step': 3825, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 320], 'flops': 9492337256192}, 'timestamp': '2025-09-10 02:24:02.546830', 'step': 3825, 'epoch': 2} {'type': 'loss', 'content': 0.007892725057899952, 'timestamp': '2025-09-10 02:24:02.558785', 'step': 3826, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 464], 'flops': 13763770790336}, 'timestamp': '2025-09-10 02:24:02.599847', 'step': 3826, 'epoch': 2} {'type': 'loss', 'content': 0.0026977970264852047, 'timestamp': '2025-09-10 02:24:02.616867', 'step': 3827, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 7119318626112}, 'timestamp': '2025-09-10 02:24:02.647816', 'step': 3827, 'epoch': 2} {'type': 'loss', 'content': 0.0012466126354411244, 'timestamp': '2025-09-10 02:24:02.675701', 'step': 3828, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 448], 'flops': 13289167064320}, 'timestamp': '2025-09-10 02:24:02.712249', 'step': 3828, 'epoch': 2} {'type': 'loss', 'content': 0.0026414524763822556, 'timestamp': '2025-09-10 02:24:02.728133', 'step': 3829, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 6644714900096}, 'timestamp': '2025-09-10 02:24:02.761149', 'step': 3829, 'epoch': 2} {'type': 'loss', 'content': 0.000412534165661782, 'timestamp': '2025-09-10 02:24:02.766841', 'step': 3830, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 8543129804160}, 'timestamp': '2025-09-10 02:24:02.801406', 'step': 3830, 'epoch': 2} {'type': 'loss', 'content': 0.004566606599837542, 'timestamp': '2025-09-10 02:24:02.810504', 'step': 3831, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 320], 'flops': 9492337256192}, 'timestamp': '2025-09-10 02:24:02.842542', 'step': 3831, 'epoch': 2} {'type': 'loss', 'content': 0.003999842330813408, 'timestamp': '2025-09-10 02:24:02.874692', 'step': 3832, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 304], 'flops': 9017733530176}, 'timestamp': '2025-09-10 02:24:02.907490', 'step': 3832, 'epoch': 2} {'type': 'loss', 'content': 0.028634166345000267, 'timestamp': '2025-09-10 02:24:02.915127', 'step': 3833, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 5695507448064}, 'timestamp': '2025-09-10 02:24:02.947307', 'step': 3833, 'epoch': 2} {'type': 'loss', 'content': 0.010636513121426105, 'timestamp': '2025-09-10 02:24:02.950689', 'step': 3834, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 304], 'flops': 9017733530176}, 'timestamp': '2025-09-10 02:24:02.982711', 'step': 3834, 'epoch': 2} {'type': 'loss', 'content': 0.00048292818246409297, 'timestamp': '2025-09-10 02:24:02.993519', 'step': 3835, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 6170111174080}, 'timestamp': '2025-09-10 02:24:03.025164', 'step': 3835, 'epoch': 2} {'type': 'loss', 'content': 0.0005672777188010514, 'timestamp': '2025-09-10 02:24:03.052537', 'step': 3836, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 304], 'flops': 9017733530176}, 'timestamp': '2025-09-10 02:24:03.084203', 'step': 3836, 'epoch': 2} {'type': 'loss', 'content': 0.0003444700560066849, 'timestamp': '2025-09-10 02:24:03.093537', 'step': 3837, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 8543129804160}, 'timestamp': '2025-09-10 02:24:03.125051', 'step': 3837, 'epoch': 2} {'type': 'loss', 'content': 0.0015334226191043854, 'timestamp': '2025-09-10 02:24:03.135462', 'step': 3838, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 8543129804160}, 'timestamp': '2025-09-10 02:24:03.166961', 'step': 3838, 'epoch': 2} {'type': 'loss', 'content': 0.0006245824624784291, 'timestamp': '2025-09-10 02:24:03.177542', 'step': 3839, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 320], 'flops': 9492337256192}, 'timestamp': '2025-09-10 02:24:03.208216', 'step': 3839, 'epoch': 2} {'type': 'loss', 'content': 0.0002433412882965058, 'timestamp': '2025-09-10 02:24:03.241567', 'step': 3840, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 6644714900096}, 'timestamp': '2025-09-10 02:24:03.273090', 'step': 3840, 'epoch': 2} {'type': 'loss', 'content': 0.0006837646360509098, 'timestamp': '2025-09-10 02:24:03.277420', 'step': 3841, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 5695507448064}, 'timestamp': '2025-09-10 02:24:03.308079', 'step': 3841, 'epoch': 2} {'type': 'loss', 'content': 0.0020594163797795773, 'timestamp': '2025-09-10 02:24:03.312065', 'step': 3842, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 5695507448064}, 'timestamp': '2025-09-10 02:24:03.342985', 'step': 3842, 'epoch': 2} {'type': 'loss', 'content': 0.0023391323629766703, 'timestamp': '2025-09-10 02:24:03.347097', 'step': 3843, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 176], 'flops': 5220903722048}, 'timestamp': '2025-09-10 02:24:03.377572', 'step': 3843, 'epoch': 2} {'type': 'loss', 'content': 0.00048196568968705833, 'timestamp': '2025-09-10 02:24:03.402366', 'step': 3844, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 5695507448064}, 'timestamp': '2025-09-10 02:24:03.433467', 'step': 3844, 'epoch': 2} {'type': 'loss', 'content': 0.002800496993586421, 'timestamp': '2025-09-10 02:24:03.435938', 'step': 3845, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 176], 'flops': 5220903722048}, 'timestamp': '2025-09-10 02:24:03.466018', 'step': 3845, 'epoch': 2} {'type': 'loss', 'content': 0.0011212360113859177, 'timestamp': '2025-09-10 02:24:03.469910', 'step': 3846, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 384], 'flops': 11390752160256}, 'timestamp': '2025-09-10 02:24:03.503751', 'step': 3846, 'epoch': 2} {'type': 'loss', 'content': 0.0017739442409947515, 'timestamp': '2025-09-10 02:24:03.517821', 'step': 3847, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-10 02:24:03.548066', 'step': 3847, 'epoch': 2} {'type': 'loss', 'content': 0.002876395359635353, 'timestamp': '2025-09-10 02:24:03.571371', 'step': 3848, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 6170111174080}, 'timestamp': '2025-09-10 02:24:03.601521', 'step': 3848, 'epoch': 2} {'type': 'loss', 'content': 0.0032014145981520414, 'timestamp': '2025-09-10 02:24:03.605727', 'step': 3849, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 8068526078144}, 'timestamp': '2025-09-10 02:24:03.636647', 'step': 3849, 'epoch': 2} {'type': 'loss', 'content': 0.001701483502984047, 'timestamp': '2025-09-10 02:24:03.646311', 'step': 3850, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 7593922352128}, 'timestamp': '2025-09-10 02:24:03.677513', 'step': 3850, 'epoch': 2} {'type': 'loss', 'content': 0.01332316268235445, 'timestamp': '2025-09-10 02:24:03.684806', 'step': 3851, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 176], 'flops': 5220903722048}, 'timestamp': '2025-09-10 02:24:03.715898', 'step': 3851, 'epoch': 2} {'type': 'loss', 'content': 0.010636200197041035, 'timestamp': '2025-09-10 02:24:03.740412', 'step': 3852, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 8068526078144}, 'timestamp': '2025-09-10 02:24:03.772090', 'step': 3852, 'epoch': 2} {'type': 'loss', 'content': 0.0018638168694451451, 'timestamp': '2025-09-10 02:24:03.778728', 'step': 3853, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 8543129804160}, 'timestamp': '2025-09-10 02:24:03.810931', 'step': 3853, 'epoch': 2} {'type': 'loss', 'content': 0.0011919804383069277, 'timestamp': '2025-09-10 02:24:03.820628', 'step': 3854, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 7119318626112}, 'timestamp': '2025-09-10 02:24:03.854201', 'step': 3854, 'epoch': 2} {'type': 'loss', 'content': 0.002363163512200117, 'timestamp': '2025-09-10 02:24:03.860738', 'step': 3855, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 4746299996032}, 'timestamp': '2025-09-10 02:24:03.892534', 'step': 3855, 'epoch': 2} {'type': 'loss', 'content': 0.0008571963990107179, 'timestamp': '2025-09-10 02:24:03.916324', 'step': 3856, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 5695507448064}, 'timestamp': '2025-09-10 02:24:03.947688', 'step': 3856, 'epoch': 2} {'type': 'loss', 'content': 0.001244921120814979, 'timestamp': '2025-09-10 02:24:03.949591', 'step': 3857, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 6170111174080}, 'timestamp': '2025-09-10 02:24:03.979801', 'step': 3857, 'epoch': 2} {'type': 'loss', 'content': 0.0029732866678386927, 'timestamp': '2025-09-10 02:24:03.986458', 'step': 3858, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 8068526078144}, 'timestamp': '2025-09-10 02:24:04.018877', 'step': 3858, 'epoch': 2} {'type': 'loss', 'content': 0.002771953120827675, 'timestamp': '2025-09-10 02:24:04.028498', 'step': 3859, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 7119318626112}, 'timestamp': '2025-09-10 02:24:04.060667', 'step': 3859, 'epoch': 2} {'type': 'loss', 'content': 0.005387973506003618, 'timestamp': '2025-09-10 02:24:04.088671', 'step': 3860, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 480], 'flops': 14238374516352}, 'timestamp': '2025-09-10 02:24:04.127687', 'step': 3860, 'epoch': 2} {'type': 'loss', 'content': 0.001176676363684237, 'timestamp': '2025-09-10 02:24:04.144668', 'step': 3861, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 6170111174080}, 'timestamp': '2025-09-10 02:24:04.177813', 'step': 3861, 'epoch': 2} {'type': 'loss', 'content': 0.0007978384965099394, 'timestamp': '2025-09-10 02:24:04.184388', 'step': 3862, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 7119318626112}, 'timestamp': '2025-09-10 02:24:04.219122', 'step': 3862, 'epoch': 2} {'type': 'loss', 'content': 0.02228599414229393, 'timestamp': '2025-09-10 02:24:04.226335', 'step': 3863, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 5695507448064}, 'timestamp': '2025-09-10 02:24:04.258575', 'step': 3863, 'epoch': 2} {'type': 'loss', 'content': 0.003167739836499095, 'timestamp': '2025-09-10 02:24:04.283761', 'step': 3864, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 432], 'flops': 12814563338304}, 'timestamp': '2025-09-10 02:24:04.321725', 'step': 3864, 'epoch': 2} {'type': 'loss', 'content': 0.0012020551366731524, 'timestamp': '2025-09-10 02:24:04.337366', 'step': 3865, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 8543129804160}, 'timestamp': '2025-09-10 02:24:04.372743', 'step': 3865, 'epoch': 2} {'type': 'loss', 'content': 0.0009655249887146056, 'timestamp': '2025-09-10 02:24:04.383198', 'step': 3866, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 7593922352128}, 'timestamp': '2025-09-10 02:24:04.423189', 'step': 3866, 'epoch': 2} {'type': 'loss', 'content': 0.0005044231074862182, 'timestamp': '2025-09-10 02:24:04.430487', 'step': 3867, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 6644714900096}, 'timestamp': '2025-09-10 02:24:04.468856', 'step': 3867, 'epoch': 2} {'type': 'loss', 'content': 0.0006265141419135034, 'timestamp': '2025-09-10 02:24:04.496358', 'step': 3868, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 6170111174080}, 'timestamp': '2025-09-10 02:24:04.535109', 'step': 3868, 'epoch': 2} {'type': 'loss', 'content': 0.0006974139832891524, 'timestamp': '2025-09-10 02:24:04.539510', 'step': 3869, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 7119318626112}, 'timestamp': '2025-09-10 02:24:04.576441', 'step': 3869, 'epoch': 2} {'type': 'loss', 'content': 0.0025091536808758974, 'timestamp': '2025-09-10 02:24:04.583740', 'step': 3870, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 7119318626112}, 'timestamp': '2025-09-10 02:24:04.616020', 'step': 3870, 'epoch': 2} {'type': 'loss', 'content': 0.002585696056485176, 'timestamp': '2025-09-10 02:24:04.623367', 'step': 3871, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 4746299996032}, 'timestamp': '2025-09-10 02:24:04.653411', 'step': 3871, 'epoch': 2} {'type': 'loss', 'content': 0.00017903503612615168, 'timestamp': '2025-09-10 02:24:04.676942', 'step': 3872, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 304], 'flops': 9017733530176}, 'timestamp': '2025-09-10 02:24:04.709069', 'step': 3872, 'epoch': 2} {'type': 'loss', 'content': 0.0017580740386620164, 'timestamp': '2025-09-10 02:24:04.718216', 'step': 3873, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 6644714900096}, 'timestamp': '2025-09-10 02:24:04.749475', 'step': 3873, 'epoch': 2} {'type': 'loss', 'content': 0.0017524746945127845, 'timestamp': '2025-09-10 02:24:04.755997', 'step': 3874, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 8068526078144}, 'timestamp': '2025-09-10 02:24:04.787618', 'step': 3874, 'epoch': 2} {'type': 'loss', 'content': 0.0003729330201167613, 'timestamp': '2025-09-10 02:24:04.797160', 'step': 3875, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 320], 'flops': 9492337256192}, 'timestamp': '2025-09-10 02:24:04.828740', 'step': 3875, 'epoch': 2} {'type': 'loss', 'content': 0.002224268391728401, 'timestamp': '2025-09-10 02:24:04.861938', 'step': 3876, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 6170111174080}, 'timestamp': '2025-09-10 02:24:04.893491', 'step': 3876, 'epoch': 2} {'type': 'loss', 'content': 0.0009001967846415937, 'timestamp': '2025-09-10 02:24:04.897745', 'step': 3877, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 7593922352128}, 'timestamp': '2025-09-10 02:24:04.930422', 'step': 3877, 'epoch': 2} {'type': 'loss', 'content': 0.0016282566357403994, 'timestamp': '2025-09-10 02:24:04.937753', 'step': 3878, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 176], 'flops': 5220903722048}, 'timestamp': '2025-09-10 02:24:04.969288', 'step': 3878, 'epoch': 2} {'type': 'loss', 'content': 0.040085725486278534, 'timestamp': '2025-09-10 02:24:04.972968', 'step': 3879, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 7119318626112}, 'timestamp': '2025-09-10 02:24:05.004246', 'step': 3879, 'epoch': 2} {'type': 'loss', 'content': 0.0017969904001802206, 'timestamp': '2025-09-10 02:24:05.032418', 'step': 3880, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 7119318626112}, 'timestamp': '2025-09-10 02:24:05.064709', 'step': 3880, 'epoch': 2} {'type': 'loss', 'content': 0.0019167568534612656, 'timestamp': '2025-09-10 02:24:05.069317', 'step': 3881, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 7593922352128}, 'timestamp': '2025-09-10 02:24:05.100668', 'step': 3881, 'epoch': 2} {'type': 'loss', 'content': 0.000999232055619359, 'timestamp': '2025-09-10 02:24:05.108173', 'step': 3882, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-10 02:24:05.140099', 'step': 3882, 'epoch': 2} {'type': 'loss', 'content': 0.0007008261163718998, 'timestamp': '2025-09-10 02:24:05.142449', 'step': 3883, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 416], 'flops': 12339959612288}, 'timestamp': '2025-09-10 02:24:05.180595', 'step': 3883, 'epoch': 2} {'type': 'loss', 'content': 0.0016444515204057097, 'timestamp': '2025-09-10 02:24:05.217354', 'step': 3884, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 304], 'flops': 9017733530176}, 'timestamp': '2025-09-10 02:24:05.251315', 'step': 3884, 'epoch': 2} {'type': 'loss', 'content': 0.0006205525132827461, 'timestamp': '2025-09-10 02:24:05.259496', 'step': 3885, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 6644714900096}, 'timestamp': '2025-09-10 02:24:05.290788', 'step': 3885, 'epoch': 2} {'type': 'loss', 'content': 0.0010018015746027231, 'timestamp': '2025-09-10 02:24:05.297304', 'step': 3886, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 6644714900096}, 'timestamp': '2025-09-10 02:24:05.329551', 'step': 3886, 'epoch': 2} {'type': 'loss', 'content': 0.0008122866274788976, 'timestamp': '2025-09-10 02:24:05.336204', 'step': 3887, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 8543129804160}, 'timestamp': '2025-09-10 02:24:05.367674', 'step': 3887, 'epoch': 2} {'type': 'loss', 'content': 0.004226117394864559, 'timestamp': '2025-09-10 02:24:05.398780', 'step': 3888, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 7119318626112}, 'timestamp': '2025-09-10 02:24:05.431026', 'step': 3888, 'epoch': 2} {'type': 'loss', 'content': 0.002288134302943945, 'timestamp': '2025-09-10 02:24:05.435599', 'step': 3889, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 480], 'flops': 14238374516352}, 'timestamp': '2025-09-10 02:24:05.477036', 'step': 3889, 'epoch': 2} {'type': 'loss', 'content': 0.027158772572875023, 'timestamp': '2025-09-10 02:24:05.494244', 'step': 3890, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 8068526078144}, 'timestamp': '2025-09-10 02:24:05.525912', 'step': 3890, 'epoch': 2} {'type': 'loss', 'content': 0.001486063003540039, 'timestamp': '2025-09-10 02:24:05.535115', 'step': 3891, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 176], 'flops': 5220903722048}, 'timestamp': '2025-09-10 02:24:05.566775', 'step': 3891, 'epoch': 2} {'type': 'loss', 'content': 0.0014154304517433047, 'timestamp': '2025-09-10 02:24:05.591814', 'step': 3892, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 6644714900096}, 'timestamp': '2025-09-10 02:24:05.623348', 'step': 3892, 'epoch': 2} {'type': 'loss', 'content': 0.015531855635344982, 'timestamp': '2025-09-10 02:24:05.627610', 'step': 3893, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 4746299996032}, 'timestamp': '2025-09-10 02:24:05.658895', 'step': 3893, 'epoch': 2} {'type': 'loss', 'content': 0.0005819514626637101, 'timestamp': '2025-09-10 02:24:05.661365', 'step': 3894, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 176], 'flops': 5220903722048}, 'timestamp': '2025-09-10 02:24:05.692262', 'step': 3894, 'epoch': 2} {'type': 'loss', 'content': 0.0014906743308529258, 'timestamp': '2025-09-10 02:24:05.696070', 'step': 3895, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 8068526078144}, 'timestamp': '2025-09-10 02:24:05.726485', 'step': 3895, 'epoch': 2} {'type': 'loss', 'content': 0.00021287697018124163, 'timestamp': '2025-09-10 02:24:05.757225', 'step': 3896, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 7593922352128}, 'timestamp': '2025-09-10 02:24:05.787897', 'step': 3896, 'epoch': 2} {'type': 'loss', 'content': 0.001206160755828023, 'timestamp': '2025-09-10 02:24:05.793073', 'step': 3897, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 4746299996032}, 'timestamp': '2025-09-10 02:24:05.824256', 'step': 3897, 'epoch': 2} {'type': 'loss', 'content': 0.0016391824465245008, 'timestamp': '2025-09-10 02:24:05.826387', 'step': 3898, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 8543129804160}, 'timestamp': '2025-09-10 02:24:05.857451', 'step': 3898, 'epoch': 2} {'type': 'loss', 'content': 0.0073780762031674385, 'timestamp': '2025-09-10 02:24:05.868009', 'step': 3899, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 6170111174080}, 'timestamp': '2025-09-10 02:24:05.902995', 'step': 3899, 'epoch': 2} {'type': 'loss', 'content': 0.0006778707611374557, 'timestamp': '2025-09-10 02:24:05.930468', 'step': 3900, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 8068526078144}, 'timestamp': '2025-09-10 02:24:05.961954', 'step': 3900, 'epoch': 2} {'type': 'loss', 'content': 0.0003114322025794536, 'timestamp': '2025-09-10 02:24:05.969241', 'step': 3901, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 352], 'flops': 10441544708224}, 'timestamp': '2025-09-10 02:24:06.002522', 'step': 3901, 'epoch': 2} {'type': 'loss', 'content': 0.0023112166672945023, 'timestamp': '2025-09-10 02:24:06.016152', 'step': 3902, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 6170111174080}, 'timestamp': '2025-09-10 02:24:06.047931', 'step': 3902, 'epoch': 2} {'type': 'loss', 'content': 0.0018408901523798704, 'timestamp': '2025-09-10 02:24:06.054449', 'step': 3903, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 5695507448064}, 'timestamp': '2025-09-10 02:24:06.085284', 'step': 3903, 'epoch': 2} {'type': 'loss', 'content': 0.0006988499662838876, 'timestamp': '2025-09-10 02:24:06.110276', 'step': 3904, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 8068526078144}, 'timestamp': '2025-09-10 02:24:06.142214', 'step': 3904, 'epoch': 2} {'type': 'loss', 'content': 0.0008732205023989081, 'timestamp': '2025-09-10 02:24:06.149419', 'step': 3905, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 7593922352128}, 'timestamp': '2025-09-10 02:24:06.180067', 'step': 3905, 'epoch': 2} {'type': 'loss', 'content': 0.0010088557610288262, 'timestamp': '2025-09-10 02:24:06.187424', 'step': 3906, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 5695507448064}, 'timestamp': '2025-09-10 02:24:06.219238', 'step': 3906, 'epoch': 2} {'type': 'loss', 'content': 0.0009297534124925733, 'timestamp': '2025-09-10 02:24:06.223486', 'step': 3907, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 8543129804160}, 'timestamp': '2025-09-10 02:24:06.254588', 'step': 3907, 'epoch': 2} {'type': 'loss', 'content': 0.002321895444765687, 'timestamp': '2025-09-10 02:24:06.286023', 'step': 3908, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 5695507448064}, 'timestamp': '2025-09-10 02:24:06.317271', 'step': 3908, 'epoch': 2} {'type': 'loss', 'content': 0.007969407364726067, 'timestamp': '2025-09-10 02:24:06.319873', 'step': 3909, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 6644714900096}, 'timestamp': '2025-09-10 02:24:06.350925', 'step': 3909, 'epoch': 2} {'type': 'loss', 'content': 0.0036575000267475843, 'timestamp': '2025-09-10 02:24:06.357603', 'step': 3910, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 6644714900096}, 'timestamp': '2025-09-10 02:24:06.389054', 'step': 3910, 'epoch': 2} {'type': 'loss', 'content': 0.004755291156470776, 'timestamp': '2025-09-10 02:24:06.395946', 'step': 3911, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 176], 'flops': 5220903722048}, 'timestamp': '2025-09-10 02:24:06.427525', 'step': 3911, 'epoch': 2} {'type': 'loss', 'content': 0.0007069699349813163, 'timestamp': '2025-09-10 02:24:06.452159', 'step': 3912, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 320], 'flops': 9492337256192}, 'timestamp': '2025-09-10 02:24:06.483179', 'step': 3912, 'epoch': 2} {'type': 'loss', 'content': 0.02136976644396782, 'timestamp': '2025-09-10 02:24:06.493098', 'step': 3913, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 7593922352128}, 'timestamp': '2025-09-10 02:24:06.523811', 'step': 3913, 'epoch': 2} {'type': 'loss', 'content': 0.01693398505449295, 'timestamp': '2025-09-10 02:24:06.531364', 'step': 3914, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 7119318626112}, 'timestamp': '2025-09-10 02:24:06.563605', 'step': 3914, 'epoch': 2} {'type': 'loss', 'content': 0.002733456203714013, 'timestamp': '2025-09-10 02:24:06.570557', 'step': 3915, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 6170111174080}, 'timestamp': '2025-09-10 02:24:06.601457', 'step': 3915, 'epoch': 2} {'type': 'loss', 'content': 0.010088739916682243, 'timestamp': '2025-09-10 02:24:06.629030', 'step': 3916, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 6644714900096}, 'timestamp': '2025-09-10 02:24:06.659845', 'step': 3916, 'epoch': 2} {'type': 'loss', 'content': 0.0005804885877296329, 'timestamp': '2025-09-10 02:24:06.664255', 'step': 3917, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 304], 'flops': 9017733530176}, 'timestamp': '2025-09-10 02:24:06.695717', 'step': 3917, 'epoch': 2} {'type': 'loss', 'content': 0.0004878589534200728, 'timestamp': '2025-09-10 02:24:06.707563', 'step': 3918, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 320], 'flops': 9492337256192}, 'timestamp': '2025-09-10 02:24:06.738941', 'step': 3918, 'epoch': 2} {'type': 'loss', 'content': 0.0038721126038581133, 'timestamp': '2025-09-10 02:24:06.751502', 'step': 3919, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 6644714900096}, 'timestamp': '2025-09-10 02:24:06.784020', 'step': 3919, 'epoch': 2} {'type': 'loss', 'content': 0.0008839900838211179, 'timestamp': '2025-09-10 02:24:06.811653', 'step': 3920, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 5695507448064}, 'timestamp': '2025-09-10 02:24:06.843427', 'step': 3920, 'epoch': 2} {'type': 'loss', 'content': 0.0010170344030484557, 'timestamp': '2025-09-10 02:24:06.845414', 'step': 3921, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 176], 'flops': 5220903722048}, 'timestamp': '2025-09-10 02:24:06.876709', 'step': 3921, 'epoch': 2} {'type': 'loss', 'content': 0.000293319666525349, 'timestamp': '2025-09-10 02:24:06.880635', 'step': 3922, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 6644714900096}, 'timestamp': '2025-09-10 02:24:06.911713', 'step': 3922, 'epoch': 2} {'type': 'loss', 'content': 0.001344728167168796, 'timestamp': '2025-09-10 02:24:06.918603', 'step': 3923, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 8543129804160}, 'timestamp': '2025-09-10 02:24:06.952454', 'step': 3923, 'epoch': 2} {'type': 'loss', 'content': 0.00218668463639915, 'timestamp': '2025-09-10 02:24:06.983868', 'step': 3924, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 7593922352128}, 'timestamp': '2025-09-10 02:24:07.014758', 'step': 3924, 'epoch': 2} {'type': 'loss', 'content': 0.008733495138585567, 'timestamp': '2025-09-10 02:24:07.019956', 'step': 3925, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 336], 'flops': 9966940982208}, 'timestamp': '2025-09-10 02:24:07.053619', 'step': 3925, 'epoch': 2} {'type': 'loss', 'content': 0.000639695324935019, 'timestamp': '2025-09-10 02:24:07.067069', 'step': 3926, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 4746299996032}, 'timestamp': '2025-09-10 02:24:07.098065', 'step': 3926, 'epoch': 2} {'type': 'loss', 'content': 0.0011399161303415895, 'timestamp': '2025-09-10 02:24:07.100522', 'step': 3927, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 7593922352128}, 'timestamp': '2025-09-10 02:24:07.131720', 'step': 3927, 'epoch': 2} {'type': 'loss', 'content': 0.0007331220549531281, 'timestamp': '2025-09-10 02:24:07.160224', 'step': 3928, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 352], 'flops': 10441544708224}, 'timestamp': '2025-09-10 02:24:07.192773', 'step': 3928, 'epoch': 2} {'type': 'loss', 'content': 0.00026727074873633683, 'timestamp': '2025-09-10 02:24:07.205833', 'step': 3929, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 304], 'flops': 9017733530176}, 'timestamp': '2025-09-10 02:24:07.237355', 'step': 3929, 'epoch': 2} {'type': 'loss', 'content': 0.0014935116050764918, 'timestamp': '2025-09-10 02:24:07.249171', 'step': 3930, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 5695507448064}, 'timestamp': '2025-09-10 02:24:07.279958', 'step': 3930, 'epoch': 2} {'type': 'loss', 'content': 0.0003637108893599361, 'timestamp': '2025-09-10 02:24:07.284267', 'step': 3931, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 176], 'flops': 5220903722048}, 'timestamp': '2025-09-10 02:24:07.315434', 'step': 3931, 'epoch': 2} {'type': 'loss', 'content': 0.00031519282492809, 'timestamp': '2025-09-10 02:24:07.340156', 'step': 3932, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 6170111174080}, 'timestamp': '2025-09-10 02:24:07.372778', 'step': 3932, 'epoch': 2} {'type': 'loss', 'content': 0.001294466550461948, 'timestamp': '2025-09-10 02:24:07.376862', 'step': 3933, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 5695507448064}, 'timestamp': '2025-09-10 02:24:07.408613', 'step': 3933, 'epoch': 2} {'type': 'loss', 'content': 0.0013076617615297437, 'timestamp': '2025-09-10 02:24:07.412877', 'step': 3934, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 6644714900096}, 'timestamp': '2025-09-10 02:24:07.443775', 'step': 3934, 'epoch': 2} {'type': 'loss', 'content': 0.0009365587611682713, 'timestamp': '2025-09-10 02:24:07.450806', 'step': 3935, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-10 02:24:07.481976', 'step': 3935, 'epoch': 2} {'type': 'loss', 'content': 0.000639063015114516, 'timestamp': '2025-09-10 02:24:07.506020', 'step': 3936, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 6644714900096}, 'timestamp': '2025-09-10 02:24:07.536530', 'step': 3936, 'epoch': 2} {'type': 'loss', 'content': 0.008496752008795738, 'timestamp': '2025-09-10 02:24:07.541316', 'step': 3937, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 7119318626112}, 'timestamp': '2025-09-10 02:24:07.572193', 'step': 3937, 'epoch': 2} {'type': 'loss', 'content': 0.0006357203237712383, 'timestamp': '2025-09-10 02:24:07.579907', 'step': 3938, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 6170111174080}, 'timestamp': '2025-09-10 02:24:07.610407', 'step': 3938, 'epoch': 2} {'type': 'loss', 'content': 0.00021839377586729825, 'timestamp': '2025-09-10 02:24:07.617289', 'step': 3939, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 304], 'flops': 9017733530176}, 'timestamp': '2025-09-10 02:24:07.648094', 'step': 3939, 'epoch': 2} {'type': 'loss', 'content': 0.0003433347155805677, 'timestamp': '2025-09-10 02:24:07.681085', 'step': 3940, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 6644714900096}, 'timestamp': '2025-09-10 02:24:07.712244', 'step': 3940, 'epoch': 2} {'type': 'loss', 'content': 0.0004599998064804822, 'timestamp': '2025-09-10 02:24:07.716927', 'step': 3941, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 8543129804160}, 'timestamp': '2025-09-10 02:24:07.747344', 'step': 3941, 'epoch': 2} {'type': 'loss', 'content': 0.008438892662525177, 'timestamp': '2025-09-10 02:24:07.758360', 'step': 3942, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 320], 'flops': 9492337256192}, 'timestamp': '2025-09-10 02:24:07.789709', 'step': 3942, 'epoch': 2} {'type': 'loss', 'content': 0.0013779483269900084, 'timestamp': '2025-09-10 02:24:07.802315', 'step': 3943, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 6644714900096}, 'timestamp': '2025-09-10 02:24:07.832842', 'step': 3943, 'epoch': 2} {'type': 'loss', 'content': 0.0002914096985477954, 'timestamp': '2025-09-10 02:24:07.860587', 'step': 3944, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 8068526078144}, 'timestamp': '2025-09-10 02:24:07.894923', 'step': 3944, 'epoch': 2} {'type': 'loss', 'content': 0.0012282740790396929, 'timestamp': '2025-09-10 02:24:07.902646', 'step': 3945, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 7119318626112}, 'timestamp': '2025-09-10 02:24:07.932838', 'step': 3945, 'epoch': 2} {'type': 'loss', 'content': 0.0001276891416637227, 'timestamp': '2025-09-10 02:24:07.940473', 'step': 3946, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 8543129804160}, 'timestamp': '2025-09-10 02:24:07.970465', 'step': 3946, 'epoch': 2} {'type': 'loss', 'content': 0.006276755593717098, 'timestamp': '2025-09-10 02:24:07.981506', 'step': 3947, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 8068526078144}, 'timestamp': '2025-09-10 02:24:08.012106', 'step': 3947, 'epoch': 2} {'type': 'loss', 'content': 0.0029240294825285673, 'timestamp': '2025-09-10 02:24:08.043281', 'step': 3948, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 7593922352128}, 'timestamp': '2025-09-10 02:24:08.073563', 'step': 3948, 'epoch': 2} {'type': 'loss', 'content': 0.0004812986881006509, 'timestamp': '2025-09-10 02:24:08.079004', 'step': 3949, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:24:08.108621', 'step': 3949, 'epoch': 2} {'type': 'loss', 'content': 0.00047114197514019907, 'timestamp': '2025-09-10 02:24:08.111228', 'step': 3950, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 304], 'flops': 9017733530176}, 'timestamp': '2025-09-10 02:24:08.141972', 'step': 3950, 'epoch': 2} {'type': 'loss', 'content': 0.0006882947636768222, 'timestamp': '2025-09-10 02:24:08.154144', 'step': 3951, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 6644714900096}, 'timestamp': '2025-09-10 02:24:08.184207', 'step': 3951, 'epoch': 2} {'type': 'loss', 'content': 0.003981906455010176, 'timestamp': '2025-09-10 02:24:08.212332', 'step': 3952, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 4746299996032}, 'timestamp': '2025-09-10 02:24:08.243261', 'step': 3952, 'epoch': 2} {'type': 'loss', 'content': 0.0001539530057925731, 'timestamp': '2025-09-10 02:24:08.245687', 'step': 3953, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 8068526078144}, 'timestamp': '2025-09-10 02:24:08.276533', 'step': 3953, 'epoch': 2} {'type': 'loss', 'content': 0.001202300889417529, 'timestamp': '2025-09-10 02:24:08.286904', 'step': 3954, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 8068526078144}, 'timestamp': '2025-09-10 02:24:08.317017', 'step': 3954, 'epoch': 2} {'type': 'loss', 'content': 0.0010202035773545504, 'timestamp': '2025-09-10 02:24:08.327013', 'step': 3955, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 7119318626112}, 'timestamp': '2025-09-10 02:24:08.357903', 'step': 3955, 'epoch': 2} {'type': 'loss', 'content': 0.005083515774458647, 'timestamp': '2025-09-10 02:24:08.386110', 'step': 3956, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 336], 'flops': 9966940982208}, 'timestamp': '2025-09-10 02:24:08.416923', 'step': 3956, 'epoch': 2} {'type': 'loss', 'content': 0.00014192526577971876, 'timestamp': '2025-09-10 02:24:08.429592', 'step': 3957, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 176], 'flops': 5220903722048}, 'timestamp': '2025-09-10 02:24:08.458783', 'step': 3957, 'epoch': 2} {'type': 'loss', 'content': 0.0236373208463192, 'timestamp': '2025-09-10 02:24:08.463031', 'step': 3958, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 304], 'flops': 9017733530176}, 'timestamp': '2025-09-10 02:24:08.493098', 'step': 3958, 'epoch': 2} {'type': 'loss', 'content': 0.004410702269524336, 'timestamp': '2025-09-10 02:24:08.505336', 'step': 3959, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 7593922352128}, 'timestamp': '2025-09-10 02:24:08.536639', 'step': 3959, 'epoch': 2} {'type': 'loss', 'content': 0.0011517350794747472, 'timestamp': '2025-09-10 02:24:08.565282', 'step': 3960, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 320], 'flops': 9492337256192}, 'timestamp': '2025-09-10 02:24:08.596232', 'step': 3960, 'epoch': 2} {'type': 'loss', 'content': 0.0006834971136413515, 'timestamp': '2025-09-10 02:24:08.606704', 'step': 3961, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 6170111174080}, 'timestamp': '2025-09-10 02:24:08.637610', 'step': 3961, 'epoch': 2} {'type': 'loss', 'content': 0.00012764804705511779, 'timestamp': '2025-09-10 02:24:08.644426', 'step': 3962, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 6644714900096}, 'timestamp': '2025-09-10 02:24:08.678508', 'step': 3962, 'epoch': 2} {'type': 'loss', 'content': 0.0006874548853375018, 'timestamp': '2025-09-10 02:24:08.685403', 'step': 3963, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 7119318626112}, 'timestamp': '2025-09-10 02:24:08.716274', 'step': 3963, 'epoch': 2} {'type': 'loss', 'content': 0.0035995282232761383, 'timestamp': '2025-09-10 02:24:08.744389', 'step': 3964, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 6644714900096}, 'timestamp': '2025-09-10 02:24:08.777692', 'step': 3964, 'epoch': 2} {'type': 'loss', 'content': 0.014470146968960762, 'timestamp': '2025-09-10 02:24:08.782058', 'step': 3965, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 5695507448064}, 'timestamp': '2025-09-10 02:24:08.813486', 'step': 3965, 'epoch': 2} {'type': 'loss', 'content': 0.019030440598726273, 'timestamp': '2025-09-10 02:24:08.817956', 'step': 3966, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 6644714900096}, 'timestamp': '2025-09-10 02:24:08.848437', 'step': 3966, 'epoch': 2} {'type': 'loss', 'content': 0.0006014609825797379, 'timestamp': '2025-09-10 02:24:08.855440', 'step': 3967, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 7119318626112}, 'timestamp': '2025-09-10 02:24:08.887013', 'step': 3967, 'epoch': 2} {'type': 'loss', 'content': 0.0009079644805751741, 'timestamp': '2025-09-10 02:24:08.915256', 'step': 3968, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 7119318626112}, 'timestamp': '2025-09-10 02:24:08.946655', 'step': 3968, 'epoch': 2} {'type': 'loss', 'content': 0.0007088962593115866, 'timestamp': '2025-09-10 02:24:08.951854', 'step': 3969, 'epoch': 2} {'type': 'flops', 'content': [{'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5695213340928}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3796808916480}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4429610391296}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4746011128704}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4746011128704}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6011614078336}, {'type': 'perplexity', 'in_batch_dim': [8, 736], 'batch_size': 8, 'flops': 14554433988352}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3796808916480}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4113209653888}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3796808916480}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4746011128704}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2847606704256}, {'type': 'perplexity', 'in_batch_dim': [8, 320], 'batch_size': 8, 'flops': 6328014815744}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5378812603520}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3796808916480}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4746011128704}, {'type': 'perplexity', 'in_batch_dim': [8, 352], 'batch_size': 8, 'flops': 6960816290560}, {'type': 'perplexity', 'in_batch_dim': [8, 384], 'batch_size': 8, 'flops': 7593617765376}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5378812603520}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4429610391296}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4113209653888}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5062411866112}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5062411866112}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4429610391296}, {'type': 'perplexity', 'in_batch_dim': [8, 480], 'batch_size': 8, 'flops': 9492022189824}, {'type': 'perplexity', 'in_batch_dim': [8, 448], 'batch_size': 8, 'flops': 8859220715008}, {'type': 'perplexity', 'in_batch_dim': [8, 320], 'batch_size': 8, 'flops': 6328014815744}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3796808916480}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6011614078336}, {'type': 'perplexity', 'in_batch_dim': [8, 320], 'batch_size': 8, 'flops': 6328014815744}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5695213340928}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4746011128704}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4429610391296}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5378812603520}, {'type': 'perplexity', 'in_batch_dim': [8, 544], 'batch_size': 8, 'flops': 10757625139456}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4429610391296}, {'type': 'perplexity', 'in_batch_dim': [8, 320], 'batch_size': 8, 'flops': 6328014815744}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4113209653888}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4113209653888}, {'type': 'perplexity', 'in_batch_dim': [8, 320], 'batch_size': 8, 'flops': 6328014815744}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3480408179072}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5062411866112}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6011614078336}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4429610391296}, {'type': 'perplexity', 'in_batch_dim': [8, 384], 'batch_size': 8, 'flops': 7593617765376}, {'type': 'perplexity', 'in_batch_dim': [8, 416], 'batch_size': 8, 'flops': 8226419240192}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4746011128704}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3480408179072}, {'type': 'perplexity', 'in_batch_dim': [8, 384], 'batch_size': 8, 'flops': 7593617765376}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3480408179072}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3480408179072}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5378812603520}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3796808916480}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4113209653888}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5062411866112}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5378812603520}, {'type': 'perplexity', 'in_batch_dim': [8, 320], 'batch_size': 8, 'flops': 6328014815744}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4429610391296}, {'type': 'perplexity', 'in_batch_dim': [8, 160], 'batch_size': 8, 'flops': 3164007441664}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4113209653888}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5695213340928}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3796808916480}, {'type': 'perplexity', 'in_batch_dim': [8, 160], 'batch_size': 8, 'flops': 3164007441664}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6011614078336}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5062411866112}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4429610391296}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3480408179072}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4746011128704}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3480408179072}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4113209653888}, {'type': 'perplexity', 'in_batch_dim': [8, 624], 'batch_size': 8, 'flops': 12339628826496}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4113209653888}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5695213340928}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5062411866112}, {'type': 'perplexity', 'in_batch_dim': [8, 384], 'batch_size': 8, 'flops': 7593617765376}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4429610391296}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4113209653888}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5062411866112}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4429610391296}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5695213340928}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3796808916480}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5378812603520}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4429610391296}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5378812603520}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3796808916480}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5695213340928}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4429610391296}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5062411866112}, {'type': 'perplexity', 'in_batch_dim': [8, 160], 'batch_size': 8, 'flops': 3164007441664}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4746011128704}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5695213340928}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3796808916480}, {'type': 'perplexity', 'in_batch_dim': [8, 416], 'batch_size': 8, 'flops': 8226419240192}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3796808916480}, {'type': 'perplexity', 'in_batch_dim': [8, 160], 'batch_size': 8, 'flops': 3164007441664}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4746011128704}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3796808916480}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2847606704256}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5378812603520}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4429610391296}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4113209653888}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4429610391296}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6011614078336}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4113209653888}, {'type': 'perplexity', 'in_batch_dim': [8, 320], 'batch_size': 8, 'flops': 6328014815744}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4746011128704}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4746011128704}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3796808916480}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5062411866112}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3480408179072}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3796808916480}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3480408179072}, {'type': 'perplexity', 'in_batch_dim': [8, 352], 'batch_size': 8, 'flops': 6960816290560}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5062411866112}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4429610391296}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6011614078336}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5062411866112}, {'type': 'perplexity', 'in_batch_dim': [8, 576], 'batch_size': 8, 'flops': 11390426614272}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4429610391296}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3796808916480}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6011614078336}, {'type': 'perplexity', 'in_batch_dim': [8, 432], 'batch_size': 8, 'flops': 8542819977600}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5378812603520}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5378812603520}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5062411866112}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4113209653888}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4113209653888}, {'type': 'perplexity', 'in_batch_dim': [8, 368], 'batch_size': 8, 'flops': 7277217027968}, {'type': 'perplexity', 'in_batch_dim': [8, 160], 'batch_size': 8, 'flops': 3164007441664}, {'type': 'perplexity', 'in_batch_dim': [8, 368], 'batch_size': 8, 'flops': 7277217027968}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3796808916480}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4746011128704}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4113209653888}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4429610391296}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5695213340928}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5695213340928}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5062411866112}, {'type': 'perplexity', 'in_batch_dim': [8, 432], 'batch_size': 8, 'flops': 8542819977600}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5062411866112}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4113209653888}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3480408179072}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3480408179072}, {'type': 'perplexity', 'in_batch_dim': [8, 160], 'batch_size': 8, 'flops': 3164007441664}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4113209653888}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3796808916480}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5062411866112}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 368], 'batch_size': 8, 'flops': 7277217027968}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5378812603520}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5378812603520}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5062411866112}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2847606704256}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4429610391296}, {'type': 'perplexity', 'in_batch_dim': [8, 320], 'batch_size': 8, 'flops': 6328014815744}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3796808916480}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4429610391296}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4113209653888}, {'type': 'perplexity', 'in_batch_dim': [8, 384], 'batch_size': 8, 'flops': 7593617765376}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5378812603520}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4746011128704}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3480408179072}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5062411866112}, {'type': 'perplexity', 'in_batch_dim': [8, 320], 'batch_size': 8, 'flops': 6328014815744}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5378812603520}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4113209653888}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4113209653888}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5695213340928}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3480408179072}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6011614078336}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4746011128704}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5378812603520}, {'type': 'perplexity', 'in_batch_dim': [8, 320], 'batch_size': 8, 'flops': 6328014815744}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4429610391296}, {'type': 'perplexity', 'in_batch_dim': [8, 320], 'batch_size': 8, 'flops': 6328014815744}, {'type': 'perplexity', 'in_batch_dim': [8, 416], 'batch_size': 8, 'flops': 8226419240192}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5062411866112}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5062411866112}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2847606704256}, {'type': 'perplexity', 'in_batch_dim': [8, 336], 'batch_size': 8, 'flops': 6644415553152}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6011614078336}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5062411866112}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4113209653888}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5695213340928}, {'type': 'perplexity', 'in_batch_dim': [8, 416], 'batch_size': 8, 'flops': 8226419240192}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3796808916480}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5695213340928}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4746011128704}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4429610391296}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4113209653888}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3480408179072}, {'type': 'perplexity', 'in_batch_dim': [8, 320], 'batch_size': 8, 'flops': 6328014815744}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3480408179072}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5378812603520}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6011614078336}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4429610391296}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5695213340928}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5695213340928}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3480408179072}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4429610391296}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5062411866112}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3480408179072}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4113209653888}, {'type': 'perplexity', 'in_batch_dim': [8, 352], 'batch_size': 8, 'flops': 6960816290560}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4113209653888}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2847606704256}, {'type': 'perplexity', 'in_batch_dim': [8, 160], 'batch_size': 8, 'flops': 3164007441664}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3796808916480}, {'type': 'perplexity', 'in_batch_dim': [8, 496], 'batch_size': 8, 'flops': 9808422927232}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3480408179072}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4113209653888}, {'type': 'perplexity', 'in_batch_dim': [8, 544], 'batch_size': 8, 'flops': 10757625139456}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6011614078336}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5378812603520}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5695213340928}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4746011128704}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5695213340928}, {'type': 'perplexity', 'in_batch_dim': [8, 384], 'batch_size': 8, 'flops': 7593617765376}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3480408179072}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5378812603520}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3480408179072}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5378812603520}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5062411866112}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3796808916480}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5062411866112}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5695213340928}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5378812603520}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4429610391296}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5062411866112}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3480408179072}, {'type': 'perplexity', 'in_batch_dim': [8, 656], 'batch_size': 8, 'flops': 12972430301312}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5062411866112}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3796808916480}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3480408179072}, {'type': 'perplexity', 'in_batch_dim': [8, 160], 'batch_size': 8, 'flops': 3164007441664}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5695213340928}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6011614078336}, {'type': 'perplexity', 'in_batch_dim': [8, 320], 'batch_size': 8, 'flops': 6328014815744}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5062411866112}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5062411866112}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5695213340928}, {'type': 'perplexity', 'in_batch_dim': [8, 320], 'batch_size': 8, 'flops': 6328014815744}, {'type': 'perplexity', 'in_batch_dim': [8, 384], 'batch_size': 8, 'flops': 7593617765376}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5062411866112}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4746011128704}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4113209653888}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4746011128704}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4746011128704}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3480408179072}, {'type': 'perplexity', 'in_batch_dim': [8, 320], 'batch_size': 8, 'flops': 6328014815744}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5695213340928}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5062411866112}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4113209653888}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4746011128704}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2847606704256}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3796808916480}, {'type': 'perplexity', 'in_batch_dim': [8, 336], 'batch_size': 8, 'flops': 6644415553152}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4113209653888}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4746011128704}, {'type': 'perplexity', 'in_batch_dim': [8, 496], 'batch_size': 8, 'flops': 9808422927232}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5378812603520}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5378812603520}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4429610391296}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5062411866112}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6011614078336}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4746011128704}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3796808916480}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4113209653888}, {'type': 'perplexity', 'in_batch_dim': [8, 496], 'batch_size': 8, 'flops': 9808422927232}, {'type': 'perplexity', 'in_batch_dim': [8, 448], 'batch_size': 8, 'flops': 8859220715008}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6011614078336}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3796808916480}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3480408179072}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5062411866112}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4429610391296}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4113209653888}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4746011128704}, {'type': 'perplexity', 'in_batch_dim': [8, 320], 'batch_size': 8, 'flops': 6328014815744}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6011614078336}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3480408179072}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3796808916480}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3796808916480}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4746011128704}, {'type': 'perplexity', 'in_batch_dim': [8, 416], 'batch_size': 8, 'flops': 8226419240192}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5695213340928}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6011614078336}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6011614078336}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4746011128704}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3480408179072}, {'type': 'perplexity', 'in_batch_dim': [8, 160], 'batch_size': 8, 'flops': 3164007441664}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5695213340928}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3480408179072}, {'type': 'perplexity', 'in_batch_dim': [8, 400], 'batch_size': 8, 'flops': 7910018502784}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4429610391296}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5378812603520}, {'type': 'perplexity', 'in_batch_dim': [8, 400], 'batch_size': 8, 'flops': 7910018502784}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5378812603520}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3480408179072}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6011614078336}, {'type': 'perplexity', 'in_batch_dim': [8, 160], 'batch_size': 8, 'flops': 3164007441664}, {'type': 'perplexity', 'in_batch_dim': [8, 352], 'batch_size': 8, 'flops': 6960816290560}, {'type': 'perplexity', 'in_batch_dim': [8, 320], 'batch_size': 8, 'flops': 6328014815744}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5062411866112}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5062411866112}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4113209653888}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4429610391296}, {'type': 'perplexity', 'in_batch_dim': [8, 352], 'batch_size': 8, 'flops': 6960816290560}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4113209653888}, {'type': 'perplexity', 'in_batch_dim': [8, 160], 'batch_size': 8, 'flops': 3164007441664}, {'type': 'perplexity', 'in_batch_dim': [8, 336], 'batch_size': 8, 'flops': 6644415553152}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4429610391296}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3796808916480}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5378812603520}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5378812603520}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4113209653888}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5378812603520}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3796808916480}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5378812603520}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5378812603520}, {'type': 'perplexity', 'in_batch_dim': [8, 160], 'batch_size': 8, 'flops': 3164007441664}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3480408179072}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6011614078336}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5695213340928}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5378812603520}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5378812603520}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4429610391296}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4746011128704}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6011614078336}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5062411866112}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6011614078336}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5378812603520}, {'type': 'perplexity', 'in_batch_dim': [8, 160], 'batch_size': 8, 'flops': 3164007441664}, {'type': 'perplexity', 'in_batch_dim': [8, 160], 'batch_size': 8, 'flops': 3164007441664}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5062411866112}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5062411866112}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3796808916480}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6011614078336}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4113209653888}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4746011128704}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5378812603520}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3480408179072}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3796808916480}, {'type': 'perplexity', 'in_batch_dim': [8, 320], 'batch_size': 8, 'flops': 6328014815744}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5062411866112}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4746011128704}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5062411866112}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4746011128704}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6011614078336}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3796808916480}, {'type': 'perplexity', 'in_batch_dim': [8, 160], 'batch_size': 8, 'flops': 3164007441664}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2847606704256}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4429610391296}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4746011128704}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5062411866112}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5378812603520}, {'type': 'perplexity', 'in_batch_dim': [8, 464], 'batch_size': 8, 'flops': 9175621452416}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4113209653888}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3480408179072}, {'type': 'perplexity', 'in_batch_dim': [8, 384], 'batch_size': 8, 'flops': 7593617765376}, {'type': 'perplexity', 'in_batch_dim': [8, 448], 'batch_size': 8, 'flops': 8859220715008}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5378812603520}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4113209653888}, {'type': 'perplexity', 'in_batch_dim': [8, 320], 'batch_size': 8, 'flops': 6328014815744}, {'type': 'perplexity', 'in_batch_dim': [8, 560], 'batch_size': 8, 'flops': 11074025876864}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5378812603520}, {'type': 'perplexity', 'in_batch_dim': [8, 160], 'batch_size': 8, 'flops': 3164007441664}, {'type': 'perplexity', 'in_batch_dim': [8, 320], 'batch_size': 8, 'flops': 6328014815744}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5062411866112}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6011614078336}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4746011128704}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6011614078336}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3796808916480}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4429610391296}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4113209653888}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4746011128704}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3480408179072}, {'type': 'perplexity', 'in_batch_dim': [8, 384], 'batch_size': 8, 'flops': 7593617765376}, {'type': 'perplexity', 'in_batch_dim': [8, 576], 'batch_size': 8, 'flops': 11390426614272}, {'type': 'perplexity', 'in_batch_dim': [8, 160], 'batch_size': 8, 'flops': 3164007441664}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4746011128704}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4746011128704}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4429610391296}, {'type': 'perplexity', 'in_batch_dim': [8, 336], 'batch_size': 8, 'flops': 6644415553152}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4746011128704}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3796808916480}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3796808916480}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5062411866112}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5378812603520}, {'type': 'perplexity', 'in_batch_dim': [8, 368], 'batch_size': 8, 'flops': 7277217027968}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3480408179072}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4113209653888}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4746011128704}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3480408179072}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4746011128704}, {'type': 'perplexity', 'in_batch_dim': [8, 1168], 'batch_size': 8, 'flops': 23097253898368}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5062411866112}, {'type': 'perplexity', 'in_batch_dim': [8, 160], 'batch_size': 8, 'flops': 3164007441664}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4746011128704}, {'type': 'perplexity', 'in_batch_dim': [8, 640], 'batch_size': 8, 'flops': 12656029563904}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5378812603520}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4429610391296}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4429610391296}, {'type': 'perplexity', 'in_batch_dim': [8, 384], 'batch_size': 8, 'flops': 7593617765376}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3480408179072}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4113209653888}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3796808916480}, {'type': 'perplexity', 'in_batch_dim': [8, 336], 'batch_size': 8, 'flops': 6644415553152}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3480408179072}, {'type': 'perplexity', 'in_batch_dim': [6, 208], 'batch_size': 8, 'flops': 4113209653888}], 'timestamp': '2025-09-10 02:24:19.403820', 'step': 3969, 'epoch': 2} {'type': 'pplx', 'content': 22799844.439538065, 'timestamp': '2025-09-10 02:24:19.408402', 'step': 3969, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 352], 'flops': 10441544708224}, 'timestamp': '2025-09-10 02:24:19.442139', 'step': 3969, 'epoch': 2} {'type': 'loss', 'content': 0.0003047510690521449, 'timestamp': '2025-09-10 02:24:19.455828', 'step': 3970, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 416], 'flops': 12339959612288}, 'timestamp': '2025-09-10 02:24:19.495658', 'step': 3970, 'epoch': 2} {'type': 'loss', 'content': 0.005057158879935741, 'timestamp': '2025-09-10 02:24:19.511583', 'step': 3971, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 6644714900096}, 'timestamp': '2025-09-10 02:24:19.544202', 'step': 3971, 'epoch': 2} {'type': 'loss', 'content': 0.008785208687186241, 'timestamp': '2025-09-10 02:24:19.572213', 'step': 3972, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 8068526078144}, 'timestamp': '2025-09-10 02:24:19.610162', 'step': 3972, 'epoch': 2} {'type': 'loss', 'content': 0.006338917650282383, 'timestamp': '2025-09-10 02:24:19.617256', 'step': 3973, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 6170111174080}, 'timestamp': '2025-09-10 02:24:19.648959', 'step': 3973, 'epoch': 2} {'type': 'loss', 'content': 0.0006893486715853214, 'timestamp': '2025-09-10 02:24:19.655825', 'step': 3974, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 5695507448064}, 'timestamp': '2025-09-10 02:24:19.692474', 'step': 3974, 'epoch': 2} {'type': 'loss', 'content': 0.0003333700296934694, 'timestamp': '2025-09-10 02:24:19.697017', 'step': 3975, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 304], 'flops': 9017733530176}, 'timestamp': '2025-09-10 02:24:19.727698', 'step': 3975, 'epoch': 2} {'type': 'loss', 'content': 8.453882037429139e-05, 'timestamp': '2025-09-10 02:24:19.760764', 'step': 3976, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 6644714900096}, 'timestamp': '2025-09-10 02:24:19.791106', 'step': 3976, 'epoch': 2} {'type': 'loss', 'content': 0.000572329037822783, 'timestamp': '2025-09-10 02:24:19.795706', 'step': 3977, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 7593922352128}, 'timestamp': '2025-09-10 02:24:19.826261', 'step': 3977, 'epoch': 2} {'type': 'loss', 'content': 0.02135993354022503, 'timestamp': '2025-09-10 02:24:19.834036', 'step': 3978, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 320], 'flops': 9492337256192}, 'timestamp': '2025-09-10 02:24:19.864596', 'step': 3978, 'epoch': 2} {'type': 'loss', 'content': 0.009581172838807106, 'timestamp': '2025-09-10 02:24:19.877140', 'step': 3979, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 7593922352128}, 'timestamp': '2025-09-10 02:24:19.907721', 'step': 3979, 'epoch': 2} {'type': 'loss', 'content': 0.009216717444360256, 'timestamp': '2025-09-10 02:24:19.936432', 'step': 3980, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 7593922352128}, 'timestamp': '2025-09-10 02:24:19.965471', 'step': 3980, 'epoch': 2} {'type': 'loss', 'content': 0.0003136309387627989, 'timestamp': '2025-09-10 02:24:19.970967', 'step': 3981, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 7119318626112}, 'timestamp': '2025-09-10 02:24:20.001248', 'step': 3981, 'epoch': 2} {'type': 'loss', 'content': 0.002166020916774869, 'timestamp': '2025-09-10 02:24:20.008799', 'step': 3982, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 304], 'flops': 9017733530176}, 'timestamp': '2025-09-10 02:24:20.044066', 'step': 3982, 'epoch': 2} {'type': 'loss', 'content': 0.0010525870602577925, 'timestamp': '2025-09-10 02:24:20.056313', 'step': 3983, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 6644714900096}, 'timestamp': '2025-09-10 02:24:20.086574', 'step': 3983, 'epoch': 2} {'type': 'loss', 'content': 0.0007074028253555298, 'timestamp': '2025-09-10 02:24:20.114464', 'step': 3984, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 5695507448064}, 'timestamp': '2025-09-10 02:24:20.149163', 'step': 3984, 'epoch': 2} {'type': 'loss', 'content': 0.017534593120217323, 'timestamp': '2025-09-10 02:24:20.151241', 'step': 3985, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 6644714900096}, 'timestamp': '2025-09-10 02:24:20.182337', 'step': 3985, 'epoch': 2} {'type': 'loss', 'content': 0.0005388972931541502, 'timestamp': '2025-09-10 02:24:20.189384', 'step': 3986, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 8068526078144}, 'timestamp': '2025-09-10 02:24:20.219925', 'step': 3986, 'epoch': 2} {'type': 'loss', 'content': 0.020015867426991463, 'timestamp': '2025-09-10 02:24:20.230202', 'step': 3987, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 176], 'flops': 5220903722048}, 'timestamp': '2025-09-10 02:24:20.265735', 'step': 3987, 'epoch': 2} {'type': 'loss', 'content': 0.0004713798116426915, 'timestamp': '2025-09-10 02:24:20.290709', 'step': 3988, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 8068526078144}, 'timestamp': '2025-09-10 02:24:20.323084', 'step': 3988, 'epoch': 2} {'type': 'loss', 'content': 0.0058451988734304905, 'timestamp': '2025-09-10 02:24:20.330695', 'step': 3989, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 6170111174080}, 'timestamp': '2025-09-10 02:24:20.363663', 'step': 3989, 'epoch': 2} {'type': 'loss', 'content': 0.026393314823508263, 'timestamp': '2025-09-10 02:24:20.370585', 'step': 3990, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 8068526078144}, 'timestamp': '2025-09-10 02:24:20.413538', 'step': 3990, 'epoch': 2} {'type': 'loss', 'content': 0.0007539827493019402, 'timestamp': '2025-09-10 02:24:20.423882', 'step': 3991, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 176], 'flops': 5220903722048}, 'timestamp': '2025-09-10 02:24:20.470114', 'step': 3991, 'epoch': 2} {'type': 'loss', 'content': 0.013442503288388252, 'timestamp': '2025-09-10 02:24:20.495116', 'step': 3992, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 5695507448064}, 'timestamp': '2025-09-10 02:24:20.526812', 'step': 3992, 'epoch': 2} {'type': 'loss', 'content': 0.0014537216629832983, 'timestamp': '2025-09-10 02:24:20.529200', 'step': 3993, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 7593922352128}, 'timestamp': '2025-09-10 02:24:20.560254', 'step': 3993, 'epoch': 2} {'type': 'loss', 'content': 0.0008240799652412534, 'timestamp': '2025-09-10 02:24:20.568128', 'step': 3994, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 176], 'flops': 5220903722048}, 'timestamp': '2025-09-10 02:24:20.599170', 'step': 3994, 'epoch': 2} {'type': 'loss', 'content': 0.0015933796530589461, 'timestamp': '2025-09-10 02:24:20.603231', 'step': 3995, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 5695507448064}, 'timestamp': '2025-09-10 02:24:20.633342', 'step': 3995, 'epoch': 2} {'type': 'loss', 'content': 0.03594691678881645, 'timestamp': '2025-09-10 02:24:20.658890', 'step': 3996, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 7593922352128}, 'timestamp': '2025-09-10 02:24:20.689281', 'step': 3996, 'epoch': 2} {'type': 'loss', 'content': 0.000319391256198287, 'timestamp': '2025-09-10 02:24:20.694555', 'step': 3997, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 8068526078144}, 'timestamp': '2025-09-10 02:24:20.724910', 'step': 3997, 'epoch': 2} {'type': 'loss', 'content': 0.0003305670979898423, 'timestamp': '2025-09-10 02:24:20.735146', 'step': 3998, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 6644714900096}, 'timestamp': '2025-09-10 02:24:20.764227', 'step': 3998, 'epoch': 2} {'type': 'loss', 'content': 8.796909969532862e-05, 'timestamp': '2025-09-10 02:24:20.771237', 'step': 3999, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 304], 'flops': 9017733530176}, 'timestamp': '2025-09-10 02:24:20.800926', 'step': 3999, 'epoch': 2} {'type': 'loss', 'content': 0.00020751934789586812, 'timestamp': '2025-09-10 02:24:20.833887', 'step': 4000, 'epoch': 2} {'type': 'info', 'content': 'Checkpoint saved at step 4000', 'timestamp': '2025-09-10 02:24:25.468790', 'step': 4000, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 6170111174080}, 'timestamp': '2025-09-10 02:24:25.512847', 'step': 4000, 'epoch': 2} {'type': 'loss', 'content': 0.0006269075674936175, 'timestamp': '2025-09-10 02:24:25.516078', 'step': 4001, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 6170111174080}, 'timestamp': '2025-09-10 02:24:25.549501', 'step': 4001, 'epoch': 2} {'type': 'loss', 'content': 0.00019980034267064184, 'timestamp': '2025-09-10 02:24:25.555522', 'step': 4002, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 6644714900096}, 'timestamp': '2025-09-10 02:24:25.589132', 'step': 4002, 'epoch': 2} {'type': 'loss', 'content': 0.003651339327916503, 'timestamp': '2025-09-10 02:24:25.595903', 'step': 4003, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 7119318626112}, 'timestamp': '2025-09-10 02:24:25.628240', 'step': 4003, 'epoch': 2} {'type': 'loss', 'content': 0.0007770135416649282, 'timestamp': '2025-09-10 02:24:25.656161', 'step': 4004, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 352], 'flops': 10441544708224}, 'timestamp': '2025-09-10 02:24:25.690389', 'step': 4004, 'epoch': 2} {'type': 'loss', 'content': 0.0008123559528030455, 'timestamp': '2025-09-10 02:24:25.703360', 'step': 4005, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 176], 'flops': 5220903722048}, 'timestamp': '2025-09-10 02:24:25.737137', 'step': 4005, 'epoch': 2} {'type': 'loss', 'content': 0.01008316408842802, 'timestamp': '2025-09-10 02:24:25.741004', 'step': 4006, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 7593922352128}, 'timestamp': '2025-09-10 02:24:25.773651', 'step': 4006, 'epoch': 2} {'type': 'loss', 'content': 0.004857528023421764, 'timestamp': '2025-09-10 02:24:25.781095', 'step': 4007, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 7119318626112}, 'timestamp': '2025-09-10 02:24:25.814355', 'step': 4007, 'epoch': 2} {'type': 'loss', 'content': 0.0003992785350419581, 'timestamp': '2025-09-10 02:24:25.842407', 'step': 4008, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 7119318626112}, 'timestamp': '2025-09-10 02:24:25.876193', 'step': 4008, 'epoch': 2} {'type': 'loss', 'content': 0.000981758115813136, 'timestamp': '2025-09-10 02:24:25.881199', 'step': 4009, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 400], 'flops': 11865355886272}, 'timestamp': '2025-09-10 02:24:25.921274', 'step': 4009, 'epoch': 2} {'type': 'loss', 'content': 0.0010118504287675023, 'timestamp': '2025-09-10 02:24:25.936833', 'step': 4010, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 7119318626112}, 'timestamp': '2025-09-10 02:24:25.979672', 'step': 4010, 'epoch': 2} {'type': 'loss', 'content': 0.0013079562922939658, 'timestamp': '2025-09-10 02:24:25.986995', 'step': 4011, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 7119318626112}, 'timestamp': '2025-09-10 02:24:26.022946', 'step': 4011, 'epoch': 2} {'type': 'loss', 'content': 0.016620881855487823, 'timestamp': '2025-09-10 02:24:26.050856', 'step': 4012, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 6644714900096}, 'timestamp': '2025-09-10 02:24:26.083657', 'step': 4012, 'epoch': 2} {'type': 'loss', 'content': 0.05415716394782066, 'timestamp': '2025-09-10 02:24:26.087661', 'step': 4013, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 400], 'flops': 11865355886272}, 'timestamp': '2025-09-10 02:24:26.128422', 'step': 4013, 'epoch': 2} {'type': 'loss', 'content': 0.0009211709839291871, 'timestamp': '2025-09-10 02:24:26.144033', 'step': 4014, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 464], 'flops': 13763770790336}, 'timestamp': '2025-09-10 02:24:26.187373', 'step': 4014, 'epoch': 2} {'type': 'loss', 'content': 0.0036754843313246965, 'timestamp': '2025-09-10 02:24:26.204433', 'step': 4015, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 6644714900096}, 'timestamp': '2025-09-10 02:24:26.238695', 'step': 4015, 'epoch': 2} {'type': 'loss', 'content': 0.0007402479532174766, 'timestamp': '2025-09-10 02:24:26.267096', 'step': 4016, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 6170111174080}, 'timestamp': '2025-09-10 02:24:26.298194', 'step': 4016, 'epoch': 2} {'type': 'loss', 'content': 0.0003799795522354543, 'timestamp': '2025-09-10 02:24:26.302393', 'step': 4017, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 320], 'flops': 9492337256192}, 'timestamp': '2025-09-10 02:24:26.335985', 'step': 4017, 'epoch': 2} {'type': 'loss', 'content': 0.0001108443975681439, 'timestamp': '2025-09-10 02:24:26.348368', 'step': 4018, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 320], 'flops': 9492337256192}, 'timestamp': '2025-09-10 02:24:26.385532', 'step': 4018, 'epoch': 2} {'type': 'loss', 'content': 0.007017719559371471, 'timestamp': '2025-09-10 02:24:26.398073', 'step': 4019, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 496], 'flops': 14712978242368}, 'timestamp': '2025-09-10 02:24:26.444594', 'step': 4019, 'epoch': 2} {'type': 'loss', 'content': 0.021222028881311417, 'timestamp': '2025-09-10 02:24:26.483027', 'step': 4020, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 320], 'flops': 9492337256192}, 'timestamp': '2025-09-10 02:24:26.516785', 'step': 4020, 'epoch': 2} {'type': 'loss', 'content': 0.0004138918302487582, 'timestamp': '2025-09-10 02:24:26.526789', 'step': 4021, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 6170111174080}, 'timestamp': '2025-09-10 02:24:26.559474', 'step': 4021, 'epoch': 2} {'type': 'loss', 'content': 0.011343798600137234, 'timestamp': '2025-09-10 02:24:26.566101', 'step': 4022, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 5695507448064}, 'timestamp': '2025-09-10 02:24:26.600887', 'step': 4022, 'epoch': 2} {'type': 'loss', 'content': 0.007483073975890875, 'timestamp': '2025-09-10 02:24:26.604769', 'step': 4023, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 4746299996032}, 'timestamp': '2025-09-10 02:24:26.638117', 'step': 4023, 'epoch': 2} {'type': 'loss', 'content': 0.003005236154422164, 'timestamp': '2025-09-10 02:24:26.662996', 'step': 4024, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 304], 'flops': 9017733530176}, 'timestamp': '2025-09-10 02:24:26.695391', 'step': 4024, 'epoch': 2} {'type': 'loss', 'content': 0.001247288309969008, 'timestamp': '2025-09-10 02:24:26.705059', 'step': 4025, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 8543129804160}, 'timestamp': '2025-09-10 02:24:26.738860', 'step': 4025, 'epoch': 2} {'type': 'loss', 'content': 0.006834504660218954, 'timestamp': '2025-09-10 02:24:26.749292', 'step': 4026, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 6644714900096}, 'timestamp': '2025-09-10 02:24:26.785078', 'step': 4026, 'epoch': 2} {'type': 'loss', 'content': 0.013955286704003811, 'timestamp': '2025-09-10 02:24:26.791860', 'step': 4027, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 176], 'flops': 5220903722048}, 'timestamp': '2025-09-10 02:24:26.825667', 'step': 4027, 'epoch': 2} {'type': 'loss', 'content': 0.0021858804393559694, 'timestamp': '2025-09-10 02:24:26.850910', 'step': 4028, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 6170111174080}, 'timestamp': '2025-09-10 02:24:26.883376', 'step': 4028, 'epoch': 2} {'type': 'loss', 'content': 0.00010486682003829628, 'timestamp': '2025-09-10 02:24:26.888564', 'step': 4029, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 7119318626112}, 'timestamp': '2025-09-10 02:24:26.924331', 'step': 4029, 'epoch': 2} {'type': 'loss', 'content': 0.00023303573834709823, 'timestamp': '2025-09-10 02:24:26.931664', 'step': 4030, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 8068526078144}, 'timestamp': '2025-09-10 02:24:26.971160', 'step': 4030, 'epoch': 2} {'type': 'loss', 'content': 0.00035887552076019347, 'timestamp': '2025-09-10 02:24:26.981146', 'step': 4031, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 6644714900096}, 'timestamp': '2025-09-10 02:24:27.012992', 'step': 4031, 'epoch': 2} {'type': 'loss', 'content': 0.0004720363358501345, 'timestamp': '2025-09-10 02:24:27.040899', 'step': 4032, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 304], 'flops': 9017733530176}, 'timestamp': '2025-09-10 02:24:27.074417', 'step': 4032, 'epoch': 2} {'type': 'loss', 'content': 0.031031426042318344, 'timestamp': '2025-09-10 02:24:27.083836', 'step': 4033, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 6644714900096}, 'timestamp': '2025-09-10 02:24:27.116891', 'step': 4033, 'epoch': 2} {'type': 'loss', 'content': 0.0011731393169611692, 'timestamp': '2025-09-10 02:24:27.123439', 'step': 4034, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 4746299996032}, 'timestamp': '2025-09-10 02:24:27.156461', 'step': 4034, 'epoch': 2} {'type': 'loss', 'content': 0.0004593496269080788, 'timestamp': '2025-09-10 02:24:27.158888', 'step': 4035, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 8543129804160}, 'timestamp': '2025-09-10 02:24:27.194989', 'step': 4035, 'epoch': 2} {'type': 'loss', 'content': 0.005191961769014597, 'timestamp': '2025-09-10 02:24:27.226408', 'step': 4036, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 432], 'flops': 12814563338304}, 'timestamp': '2025-09-10 02:24:27.267895', 'step': 4036, 'epoch': 2} {'type': 'loss', 'content': 0.0007114148465916514, 'timestamp': '2025-09-10 02:24:27.283581', 'step': 4037, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 7119318626112}, 'timestamp': '2025-09-10 02:24:27.317423', 'step': 4037, 'epoch': 2} {'type': 'loss', 'content': 0.00016225686704274267, 'timestamp': '2025-09-10 02:24:27.324612', 'step': 4038, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 384], 'flops': 11390752160256}, 'timestamp': '2025-09-10 02:24:27.362907', 'step': 4038, 'epoch': 2} {'type': 'loss', 'content': 0.00895176362246275, 'timestamp': '2025-09-10 02:24:27.376886', 'step': 4039, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 5695507448064}, 'timestamp': '2025-09-10 02:24:27.413372', 'step': 4039, 'epoch': 2} {'type': 'loss', 'content': 0.0012197830947116017, 'timestamp': '2025-09-10 02:24:27.438493', 'step': 4040, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 368], 'flops': 10916148434240}, 'timestamp': '2025-09-10 02:24:27.475483', 'step': 4040, 'epoch': 2} {'type': 'loss', 'content': 0.00033447827445343137, 'timestamp': '2025-09-10 02:24:27.488606', 'step': 4041, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 6170111174080}, 'timestamp': '2025-09-10 02:24:27.521034', 'step': 4041, 'epoch': 2} {'type': 'loss', 'content': 0.001729366136714816, 'timestamp': '2025-09-10 02:24:27.527787', 'step': 4042, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 7119318626112}, 'timestamp': '2025-09-10 02:24:27.560417', 'step': 4042, 'epoch': 2} {'type': 'loss', 'content': 0.01877027377486229, 'timestamp': '2025-09-10 02:24:27.567838', 'step': 4043, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 6644714900096}, 'timestamp': '2025-09-10 02:24:27.600247', 'step': 4043, 'epoch': 2} {'type': 'loss', 'content': 0.0001404429494868964, 'timestamp': '2025-09-10 02:24:27.627831', 'step': 4044, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 336], 'flops': 9966940982208}, 'timestamp': '2025-09-10 02:24:27.663723', 'step': 4044, 'epoch': 2} {'type': 'loss', 'content': 0.027306651696562767, 'timestamp': '2025-09-10 02:24:27.675805', 'step': 4045, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 304], 'flops': 9017733530176}, 'timestamp': '2025-09-10 02:24:27.711995', 'step': 4045, 'epoch': 2} {'type': 'loss', 'content': 0.00820981990545988, 'timestamp': '2025-09-10 02:24:27.723866', 'step': 4046, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 8068526078144}, 'timestamp': '2025-09-10 02:24:27.757447', 'step': 4046, 'epoch': 2} {'type': 'loss', 'content': 0.00019994494505226612, 'timestamp': '2025-09-10 02:24:27.767386', 'step': 4047, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 5695507448064}, 'timestamp': '2025-09-10 02:24:27.800142', 'step': 4047, 'epoch': 2} {'type': 'loss', 'content': 0.0016157986829057336, 'timestamp': '2025-09-10 02:24:27.825392', 'step': 4048, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 7119318626112}, 'timestamp': '2025-09-10 02:24:27.857196', 'step': 4048, 'epoch': 2} {'type': 'loss', 'content': 0.0032153644133359194, 'timestamp': '2025-09-10 02:24:27.862081', 'step': 4049, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 6644714900096}, 'timestamp': '2025-09-10 02:24:27.895807', 'step': 4049, 'epoch': 2} {'type': 'loss', 'content': 0.0009131658589467406, 'timestamp': '2025-09-10 02:24:27.902771', 'step': 4050, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 4746299996032}, 'timestamp': '2025-09-10 02:24:27.938617', 'step': 4050, 'epoch': 2} {'type': 'loss', 'content': 0.002615003613755107, 'timestamp': '2025-09-10 02:24:27.941114', 'step': 4051, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 8543129804160}, 'timestamp': '2025-09-10 02:24:27.972303', 'step': 4051, 'epoch': 2} {'type': 'loss', 'content': 0.0024026173632591963, 'timestamp': '2025-09-10 02:24:28.003897', 'step': 4052, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 8068526078144}, 'timestamp': '2025-09-10 02:24:28.037982', 'step': 4052, 'epoch': 2} {'type': 'loss', 'content': 0.0002667165535967797, 'timestamp': '2025-09-10 02:24:28.045276', 'step': 4053, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 304], 'flops': 9017733530176}, 'timestamp': '2025-09-10 02:24:28.078604', 'step': 4053, 'epoch': 2} {'type': 'loss', 'content': 0.0019048672402277589, 'timestamp': '2025-09-10 02:24:28.090526', 'step': 4054, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 7593922352128}, 'timestamp': '2025-09-10 02:24:28.122157', 'step': 4054, 'epoch': 2} {'type': 'loss', 'content': 0.0007782382308505476, 'timestamp': '2025-09-10 02:24:28.129961', 'step': 4055, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 352], 'flops': 10441544708224}, 'timestamp': '2025-09-10 02:24:28.166964', 'step': 4055, 'epoch': 2} {'type': 'loss', 'content': 0.01114829070866108, 'timestamp': '2025-09-10 02:24:28.201501', 'step': 4056, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 416], 'flops': 12339959612288}, 'timestamp': '2025-09-10 02:24:28.245463', 'step': 4056, 'epoch': 2} {'type': 'loss', 'content': 0.0023074380587786436, 'timestamp': '2025-09-10 02:24:28.260862', 'step': 4057, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 7119318626112}, 'timestamp': '2025-09-10 02:24:28.293857', 'step': 4057, 'epoch': 2} {'type': 'loss', 'content': 0.002955834148451686, 'timestamp': '2025-09-10 02:24:28.301123', 'step': 4058, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 176], 'flops': 5220903722048}, 'timestamp': '2025-09-10 02:24:28.333365', 'step': 4058, 'epoch': 2} {'type': 'loss', 'content': 0.003895343979820609, 'timestamp': '2025-09-10 02:24:28.337305', 'step': 4059, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 336], 'flops': 9966940982208}, 'timestamp': '2025-09-10 02:24:28.373184', 'step': 4059, 'epoch': 2} {'type': 'loss', 'content': 0.008619307540357113, 'timestamp': '2025-09-10 02:24:28.407381', 'step': 4060, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 8068526078144}, 'timestamp': '2025-09-10 02:24:28.441969', 'step': 4060, 'epoch': 2} {'type': 'loss', 'content': 0.016522839665412903, 'timestamp': '2025-09-10 02:24:28.449041', 'step': 4061, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 8068526078144}, 'timestamp': '2025-09-10 02:24:28.483243', 'step': 4061, 'epoch': 2} {'type': 'loss', 'content': 0.0006908263312652707, 'timestamp': '2025-09-10 02:24:28.492993', 'step': 4062, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 7119318626112}, 'timestamp': '2025-09-10 02:24:28.527778', 'step': 4062, 'epoch': 2} {'type': 'loss', 'content': 0.0022160657681524754, 'timestamp': '2025-09-10 02:24:28.534885', 'step': 4063, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 6170111174080}, 'timestamp': '2025-09-10 02:24:28.567164', 'step': 4063, 'epoch': 2} {'type': 'loss', 'content': 0.00819767639040947, 'timestamp': '2025-09-10 02:24:28.594498', 'step': 4064, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 320], 'flops': 9492337256192}, 'timestamp': '2025-09-10 02:24:28.629854', 'step': 4064, 'epoch': 2} {'type': 'loss', 'content': 0.0025234988424926996, 'timestamp': '2025-09-10 02:24:28.639696', 'step': 4065, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 7119318626112}, 'timestamp': '2025-09-10 02:24:28.671547', 'step': 4065, 'epoch': 2} {'type': 'loss', 'content': 0.0017039499944075942, 'timestamp': '2025-09-10 02:24:28.678953', 'step': 4066, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 8068526078144}, 'timestamp': '2025-09-10 02:24:28.709858', 'step': 4066, 'epoch': 2} {'type': 'loss', 'content': 0.0045456611551344395, 'timestamp': '2025-09-10 02:24:28.720134', 'step': 4067, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 7119318626112}, 'timestamp': '2025-09-10 02:24:28.752404', 'step': 4067, 'epoch': 2} {'type': 'loss', 'content': 0.05136652663350105, 'timestamp': '2025-09-10 02:24:28.780690', 'step': 4068, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 6644714900096}, 'timestamp': '2025-09-10 02:24:28.811401', 'step': 4068, 'epoch': 2} {'type': 'loss', 'content': 0.0038302938919514418, 'timestamp': '2025-09-10 02:24:28.816041', 'step': 4069, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 352], 'flops': 10441544708224}, 'timestamp': '2025-09-10 02:24:28.851839', 'step': 4069, 'epoch': 2} {'type': 'loss', 'content': 0.0043130056001245975, 'timestamp': '2025-09-10 02:24:28.865507', 'step': 4070, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 6644714900096}, 'timestamp': '2025-09-10 02:24:28.896166', 'step': 4070, 'epoch': 2} {'type': 'loss', 'content': 0.004200483672320843, 'timestamp': '2025-09-10 02:24:28.903238', 'step': 4071, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 4746299996032}, 'timestamp': '2025-09-10 02:24:28.935274', 'step': 4071, 'epoch': 2} {'type': 'loss', 'content': 0.0009043649188242853, 'timestamp': '2025-09-10 02:24:28.959569', 'step': 4072, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 7593922352128}, 'timestamp': '2025-09-10 02:24:28.991351', 'step': 4072, 'epoch': 2} {'type': 'loss', 'content': 0.001501325867138803, 'timestamp': '2025-09-10 02:24:28.996915', 'step': 4073, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 4746299996032}, 'timestamp': '2025-09-10 02:24:29.028148', 'step': 4073, 'epoch': 2} {'type': 'loss', 'content': 0.0002941501443274319, 'timestamp': '2025-09-10 02:24:29.030693', 'step': 4074, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 528], 'flops': 15662185694400}, 'timestamp': '2025-09-10 02:24:29.076492', 'step': 4074, 'epoch': 2} {'type': 'loss', 'content': 0.001308751991018653, 'timestamp': '2025-09-10 02:24:29.095675', 'step': 4075, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 6644714900096}, 'timestamp': '2025-09-10 02:24:29.125589', 'step': 4075, 'epoch': 2} {'type': 'loss', 'content': 0.0014235320268198848, 'timestamp': '2025-09-10 02:24:29.153553', 'step': 4076, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 320], 'flops': 9492337256192}, 'timestamp': '2025-09-10 02:24:29.186944', 'step': 4076, 'epoch': 2} {'type': 'loss', 'content': 5.163023524801247e-05, 'timestamp': '2025-09-10 02:24:29.197445', 'step': 4077, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:24:29.229513', 'step': 4077, 'epoch': 2} {'type': 'loss', 'content': 0.0026386440731585026, 'timestamp': '2025-09-10 02:24:29.231295', 'step': 4078, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 8543129804160}, 'timestamp': '2025-09-10 02:24:29.262111', 'step': 4078, 'epoch': 2} {'type': 'loss', 'content': 0.0006315871723927557, 'timestamp': '2025-09-10 02:24:29.272626', 'step': 4079, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 5695507448064}, 'timestamp': '2025-09-10 02:24:29.304174', 'step': 4079, 'epoch': 2} {'type': 'loss', 'content': 0.0019018551101908088, 'timestamp': '2025-09-10 02:24:29.329256', 'step': 4080, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 384], 'flops': 11390752160256}, 'timestamp': '2025-09-10 02:24:29.363347', 'step': 4080, 'epoch': 2} {'type': 'loss', 'content': 0.0014254730194807053, 'timestamp': '2025-09-10 02:24:29.376636', 'step': 4081, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 8068526078144}, 'timestamp': '2025-09-10 02:24:29.408930', 'step': 4081, 'epoch': 2} {'type': 'loss', 'content': 0.012064416892826557, 'timestamp': '2025-09-10 02:24:29.418881', 'step': 4082, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 7593922352128}, 'timestamp': '2025-09-10 02:24:29.451248', 'step': 4082, 'epoch': 2} {'type': 'loss', 'content': 0.006986396852880716, 'timestamp': '2025-09-10 02:24:29.458324', 'step': 4083, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 352], 'flops': 10441544708224}, 'timestamp': '2025-09-10 02:24:29.495905', 'step': 4083, 'epoch': 2} {'type': 'loss', 'content': 0.00028711804770864546, 'timestamp': '2025-09-10 02:24:29.530484', 'step': 4084, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 7593922352128}, 'timestamp': '2025-09-10 02:24:29.562704', 'step': 4084, 'epoch': 2} {'type': 'loss', 'content': 0.054849933832883835, 'timestamp': '2025-09-10 02:24:29.567305', 'step': 4085, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 6170111174080}, 'timestamp': '2025-09-10 02:24:29.596976', 'step': 4085, 'epoch': 2} {'type': 'loss', 'content': 0.015255759470164776, 'timestamp': '2025-09-10 02:24:29.603589', 'step': 4086, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 4746299996032}, 'timestamp': '2025-09-10 02:24:29.634971', 'step': 4086, 'epoch': 2} {'type': 'loss', 'content': 0.00023272530233953148, 'timestamp': '2025-09-10 02:24:29.637246', 'step': 4087, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 4746299996032}, 'timestamp': '2025-09-10 02:24:29.668012', 'step': 4087, 'epoch': 2} {'type': 'loss', 'content': 0.0014455585042014718, 'timestamp': '2025-09-10 02:24:29.691670', 'step': 4088, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 6170111174080}, 'timestamp': '2025-09-10 02:24:29.722453', 'step': 4088, 'epoch': 2} {'type': 'loss', 'content': 0.08093362301588058, 'timestamp': '2025-09-10 02:24:29.726769', 'step': 4089, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 416], 'flops': 12339959612288}, 'timestamp': '2025-09-10 02:24:29.766804', 'step': 4089, 'epoch': 2} {'type': 'loss', 'content': 0.05044776201248169, 'timestamp': '2025-09-10 02:24:29.782688', 'step': 4090, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 5695507448064}, 'timestamp': '2025-09-10 02:24:29.814916', 'step': 4090, 'epoch': 2} {'type': 'loss', 'content': 0.004336885642260313, 'timestamp': '2025-09-10 02:24:29.818884', 'step': 4091, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 7593922352128}, 'timestamp': '2025-09-10 02:24:29.850230', 'step': 4091, 'epoch': 2} {'type': 'loss', 'content': 0.012839260511100292, 'timestamp': '2025-09-10 02:24:29.878529', 'step': 4092, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 7119318626112}, 'timestamp': '2025-09-10 02:24:29.910753', 'step': 4092, 'epoch': 2} {'type': 'loss', 'content': 0.0022947373799979687, 'timestamp': '2025-09-10 02:24:29.915492', 'step': 4093, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 176], 'flops': 5220903722048}, 'timestamp': '2025-09-10 02:24:29.945821', 'step': 4093, 'epoch': 2} {'type': 'loss', 'content': 0.00331043335609138, 'timestamp': '2025-09-10 02:24:29.949679', 'step': 4094, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 6170111174080}, 'timestamp': '2025-09-10 02:24:29.982228', 'step': 4094, 'epoch': 2} {'type': 'loss', 'content': 0.0018772233743220568, 'timestamp': '2025-09-10 02:24:29.988899', 'step': 4095, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 336], 'flops': 9966940982208}, 'timestamp': '2025-09-10 02:24:30.022329', 'step': 4095, 'epoch': 2} {'type': 'loss', 'content': 0.0015633044531568885, 'timestamp': '2025-09-10 02:24:30.056674', 'step': 4096, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 7593922352128}, 'timestamp': '2025-09-10 02:24:30.087499', 'step': 4096, 'epoch': 2} {'type': 'loss', 'content': 0.007612540386617184, 'timestamp': '2025-09-10 02:24:30.092839', 'step': 4097, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 7593922352128}, 'timestamp': '2025-09-10 02:24:30.124846', 'step': 4097, 'epoch': 2} {'type': 'loss', 'content': 0.0009734017075970769, 'timestamp': '2025-09-10 02:24:30.132521', 'step': 4098, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 5695507448064}, 'timestamp': '2025-09-10 02:24:30.167935', 'step': 4098, 'epoch': 2} {'type': 'loss', 'content': 0.0005414964980445802, 'timestamp': '2025-09-10 02:24:30.172166', 'step': 4099, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 608], 'flops': 18035204324480}, 'timestamp': '2025-09-10 02:24:30.224308', 'step': 4099, 'epoch': 2} {'type': 'loss', 'content': 0.003065018681809306, 'timestamp': '2025-09-10 02:24:30.266654', 'step': 4100, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 4746299996032}, 'timestamp': '2025-09-10 02:24:30.299544', 'step': 4100, 'epoch': 2} {'type': 'loss', 'content': 0.0015219785273075104, 'timestamp': '2025-09-10 02:24:30.301834', 'step': 4101, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 304], 'flops': 9017733530176}, 'timestamp': '2025-09-10 02:24:30.335591', 'step': 4101, 'epoch': 2} {'type': 'loss', 'content': 0.0007919540512375534, 'timestamp': '2025-09-10 02:24:30.346183', 'step': 4102, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 176], 'flops': 5220903722048}, 'timestamp': '2025-09-10 02:24:30.382001', 'step': 4102, 'epoch': 2} {'type': 'loss', 'content': 0.0004758323193527758, 'timestamp': '2025-09-10 02:24:30.385670', 'step': 4103, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 5695507448064}, 'timestamp': '2025-09-10 02:24:30.420886', 'step': 4103, 'epoch': 2} {'type': 'loss', 'content': 0.001412588288076222, 'timestamp': '2025-09-10 02:24:30.445924', 'step': 4104, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 4746299996032}, 'timestamp': '2025-09-10 02:24:30.479410', 'step': 4104, 'epoch': 2} {'type': 'loss', 'content': 0.0007647694437764585, 'timestamp': '2025-09-10 02:24:30.481760', 'step': 4105, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 4746299996032}, 'timestamp': '2025-09-10 02:24:30.513060', 'step': 4105, 'epoch': 2} {'type': 'loss', 'content': 0.00031270290492102504, 'timestamp': '2025-09-10 02:24:30.515597', 'step': 4106, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 4746299996032}, 'timestamp': '2025-09-10 02:24:30.547183', 'step': 4106, 'epoch': 2} {'type': 'loss', 'content': 0.0012159907491877675, 'timestamp': '2025-09-10 02:24:30.549834', 'step': 4107, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 448], 'flops': 13289167064320}, 'timestamp': '2025-09-10 02:24:30.590714', 'step': 4107, 'epoch': 2} {'type': 'loss', 'content': 0.001639689551666379, 'timestamp': '2025-09-10 02:24:30.627898', 'step': 4108, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 352], 'flops': 10441544708224}, 'timestamp': '2025-09-10 02:24:30.661074', 'step': 4108, 'epoch': 2} {'type': 'loss', 'content': 0.0044628409668803215, 'timestamp': '2025-09-10 02:24:30.674061', 'step': 4109, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 8068526078144}, 'timestamp': '2025-09-10 02:24:30.705416', 'step': 4109, 'epoch': 2} {'type': 'loss', 'content': 0.0015483727911487222, 'timestamp': '2025-09-10 02:24:30.715405', 'step': 4110, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 8068526078144}, 'timestamp': '2025-09-10 02:24:30.746478', 'step': 4110, 'epoch': 2} {'type': 'loss', 'content': 0.012858943082392216, 'timestamp': '2025-09-10 02:24:30.756300', 'step': 4111, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 7593922352128}, 'timestamp': '2025-09-10 02:24:30.788159', 'step': 4111, 'epoch': 2} {'type': 'loss', 'content': 0.0005279368488118052, 'timestamp': '2025-09-10 02:24:30.816741', 'step': 4112, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 7119318626112}, 'timestamp': '2025-09-10 02:24:30.848252', 'step': 4112, 'epoch': 2} {'type': 'loss', 'content': 0.001972366590052843, 'timestamp': '2025-09-10 02:24:30.852943', 'step': 4113, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 6644714900096}, 'timestamp': '2025-09-10 02:24:30.884524', 'step': 4113, 'epoch': 2} {'type': 'loss', 'content': 0.0004011372511740774, 'timestamp': '2025-09-10 02:24:30.891184', 'step': 4114, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 176], 'flops': 5220903722048}, 'timestamp': '2025-09-10 02:24:30.922970', 'step': 4114, 'epoch': 2} {'type': 'loss', 'content': 0.0024985368363559246, 'timestamp': '2025-09-10 02:24:30.926771', 'step': 4115, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 5695507448064}, 'timestamp': '2025-09-10 02:24:30.957507', 'step': 4115, 'epoch': 2} {'type': 'loss', 'content': 0.0012376006925478578, 'timestamp': '2025-09-10 02:24:30.982801', 'step': 4116, 'epoch': 2} {'type': 'flops', 'content': [{'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5695213340928}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3796808916480}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4429610391296}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4746011128704}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4746011128704}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6011614078336}, {'type': 'perplexity', 'in_batch_dim': [8, 736], 'batch_size': 8, 'flops': 14554433988352}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3796808916480}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4113209653888}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3796808916480}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4746011128704}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2847606704256}, {'type': 'perplexity', 'in_batch_dim': [8, 320], 'batch_size': 8, 'flops': 6328014815744}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5378812603520}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3796808916480}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4746011128704}, {'type': 'perplexity', 'in_batch_dim': [8, 352], 'batch_size': 8, 'flops': 6960816290560}, {'type': 'perplexity', 'in_batch_dim': [8, 384], 'batch_size': 8, 'flops': 7593617765376}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5378812603520}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4429610391296}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4113209653888}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5062411866112}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5062411866112}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4429610391296}, {'type': 'perplexity', 'in_batch_dim': [8, 480], 'batch_size': 8, 'flops': 9492022189824}, {'type': 'perplexity', 'in_batch_dim': [8, 448], 'batch_size': 8, 'flops': 8859220715008}, {'type': 'perplexity', 'in_batch_dim': [8, 320], 'batch_size': 8, 'flops': 6328014815744}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3796808916480}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6011614078336}, {'type': 'perplexity', 'in_batch_dim': [8, 320], 'batch_size': 8, 'flops': 6328014815744}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5695213340928}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4746011128704}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4429610391296}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5378812603520}, {'type': 'perplexity', 'in_batch_dim': [8, 544], 'batch_size': 8, 'flops': 10757625139456}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4429610391296}, {'type': 'perplexity', 'in_batch_dim': [8, 320], 'batch_size': 8, 'flops': 6328014815744}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4113209653888}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4113209653888}, {'type': 'perplexity', 'in_batch_dim': [8, 320], 'batch_size': 8, 'flops': 6328014815744}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3480408179072}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5062411866112}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6011614078336}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4429610391296}, {'type': 'perplexity', 'in_batch_dim': [8, 384], 'batch_size': 8, 'flops': 7593617765376}, {'type': 'perplexity', 'in_batch_dim': [8, 416], 'batch_size': 8, 'flops': 8226419240192}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4746011128704}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3480408179072}, {'type': 'perplexity', 'in_batch_dim': [8, 384], 'batch_size': 8, 'flops': 7593617765376}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3480408179072}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3480408179072}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5378812603520}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3796808916480}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4113209653888}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5062411866112}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5378812603520}, {'type': 'perplexity', 'in_batch_dim': [8, 320], 'batch_size': 8, 'flops': 6328014815744}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4429610391296}, {'type': 'perplexity', 'in_batch_dim': [8, 160], 'batch_size': 8, 'flops': 3164007441664}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4113209653888}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5695213340928}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3796808916480}, {'type': 'perplexity', 'in_batch_dim': [8, 160], 'batch_size': 8, 'flops': 3164007441664}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6011614078336}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5062411866112}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4429610391296}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3480408179072}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4746011128704}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3480408179072}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4113209653888}, {'type': 'perplexity', 'in_batch_dim': [8, 624], 'batch_size': 8, 'flops': 12339628826496}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4113209653888}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5695213340928}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5062411866112}, {'type': 'perplexity', 'in_batch_dim': [8, 384], 'batch_size': 8, 'flops': 7593617765376}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4429610391296}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4113209653888}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5062411866112}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4429610391296}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5695213340928}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3796808916480}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5378812603520}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4429610391296}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5378812603520}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3796808916480}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5695213340928}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4429610391296}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5062411866112}, {'type': 'perplexity', 'in_batch_dim': [8, 160], 'batch_size': 8, 'flops': 3164007441664}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4746011128704}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5695213340928}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3796808916480}, {'type': 'perplexity', 'in_batch_dim': [8, 416], 'batch_size': 8, 'flops': 8226419240192}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3796808916480}, {'type': 'perplexity', 'in_batch_dim': [8, 160], 'batch_size': 8, 'flops': 3164007441664}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4746011128704}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3796808916480}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2847606704256}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5378812603520}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4429610391296}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4113209653888}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4429610391296}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6011614078336}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4113209653888}, {'type': 'perplexity', 'in_batch_dim': [8, 320], 'batch_size': 8, 'flops': 6328014815744}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4746011128704}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4746011128704}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3796808916480}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5062411866112}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3480408179072}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3796808916480}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3480408179072}, {'type': 'perplexity', 'in_batch_dim': [8, 352], 'batch_size': 8, 'flops': 6960816290560}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5062411866112}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4429610391296}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6011614078336}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5062411866112}, {'type': 'perplexity', 'in_batch_dim': [8, 576], 'batch_size': 8, 'flops': 11390426614272}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4429610391296}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3796808916480}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6011614078336}, {'type': 'perplexity', 'in_batch_dim': [8, 432], 'batch_size': 8, 'flops': 8542819977600}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5378812603520}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5378812603520}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5062411866112}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4113209653888}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4113209653888}, {'type': 'perplexity', 'in_batch_dim': [8, 368], 'batch_size': 8, 'flops': 7277217027968}, {'type': 'perplexity', 'in_batch_dim': [8, 160], 'batch_size': 8, 'flops': 3164007441664}, {'type': 'perplexity', 'in_batch_dim': [8, 368], 'batch_size': 8, 'flops': 7277217027968}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3796808916480}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4746011128704}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4113209653888}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4429610391296}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5695213340928}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5695213340928}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5062411866112}, {'type': 'perplexity', 'in_batch_dim': [8, 432], 'batch_size': 8, 'flops': 8542819977600}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5062411866112}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4113209653888}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3480408179072}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3480408179072}, {'type': 'perplexity', 'in_batch_dim': [8, 160], 'batch_size': 8, 'flops': 3164007441664}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4113209653888}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3796808916480}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5062411866112}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 368], 'batch_size': 8, 'flops': 7277217027968}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5378812603520}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5378812603520}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5062411866112}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2847606704256}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4429610391296}, {'type': 'perplexity', 'in_batch_dim': [8, 320], 'batch_size': 8, 'flops': 6328014815744}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3796808916480}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4429610391296}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4113209653888}, {'type': 'perplexity', 'in_batch_dim': [8, 384], 'batch_size': 8, 'flops': 7593617765376}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5378812603520}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4746011128704}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3480408179072}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5062411866112}, {'type': 'perplexity', 'in_batch_dim': [8, 320], 'batch_size': 8, 'flops': 6328014815744}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5378812603520}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4113209653888}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4113209653888}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5695213340928}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3480408179072}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6011614078336}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4746011128704}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5378812603520}, {'type': 'perplexity', 'in_batch_dim': [8, 320], 'batch_size': 8, 'flops': 6328014815744}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4429610391296}, {'type': 'perplexity', 'in_batch_dim': [8, 320], 'batch_size': 8, 'flops': 6328014815744}, {'type': 'perplexity', 'in_batch_dim': [8, 416], 'batch_size': 8, 'flops': 8226419240192}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5062411866112}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5062411866112}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2847606704256}, {'type': 'perplexity', 'in_batch_dim': [8, 336], 'batch_size': 8, 'flops': 6644415553152}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6011614078336}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5062411866112}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4113209653888}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5695213340928}, {'type': 'perplexity', 'in_batch_dim': [8, 416], 'batch_size': 8, 'flops': 8226419240192}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3796808916480}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5695213340928}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4746011128704}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4429610391296}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4113209653888}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3480408179072}, {'type': 'perplexity', 'in_batch_dim': [8, 320], 'batch_size': 8, 'flops': 6328014815744}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3480408179072}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5378812603520}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6011614078336}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4429610391296}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5695213340928}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5695213340928}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3480408179072}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4429610391296}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5062411866112}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3480408179072}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4113209653888}, {'type': 'perplexity', 'in_batch_dim': [8, 352], 'batch_size': 8, 'flops': 6960816290560}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4113209653888}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2847606704256}, {'type': 'perplexity', 'in_batch_dim': [8, 160], 'batch_size': 8, 'flops': 3164007441664}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3796808916480}, {'type': 'perplexity', 'in_batch_dim': [8, 496], 'batch_size': 8, 'flops': 9808422927232}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3480408179072}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4113209653888}, {'type': 'perplexity', 'in_batch_dim': [8, 544], 'batch_size': 8, 'flops': 10757625139456}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6011614078336}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5378812603520}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5695213340928}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4746011128704}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5695213340928}, {'type': 'perplexity', 'in_batch_dim': [8, 384], 'batch_size': 8, 'flops': 7593617765376}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3480408179072}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5378812603520}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3480408179072}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5378812603520}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5062411866112}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3796808916480}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5062411866112}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5695213340928}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5378812603520}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4429610391296}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5062411866112}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3480408179072}, {'type': 'perplexity', 'in_batch_dim': [8, 656], 'batch_size': 8, 'flops': 12972430301312}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5062411866112}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3796808916480}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3480408179072}, {'type': 'perplexity', 'in_batch_dim': [8, 160], 'batch_size': 8, 'flops': 3164007441664}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5695213340928}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6011614078336}, {'type': 'perplexity', 'in_batch_dim': [8, 320], 'batch_size': 8, 'flops': 6328014815744}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5062411866112}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5062411866112}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5695213340928}, {'type': 'perplexity', 'in_batch_dim': [8, 320], 'batch_size': 8, 'flops': 6328014815744}, {'type': 'perplexity', 'in_batch_dim': [8, 384], 'batch_size': 8, 'flops': 7593617765376}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5062411866112}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4746011128704}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4113209653888}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4746011128704}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4746011128704}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3480408179072}, {'type': 'perplexity', 'in_batch_dim': [8, 320], 'batch_size': 8, 'flops': 6328014815744}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5695213340928}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5062411866112}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4113209653888}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4746011128704}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2847606704256}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3796808916480}, {'type': 'perplexity', 'in_batch_dim': [8, 336], 'batch_size': 8, 'flops': 6644415553152}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4113209653888}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4746011128704}, {'type': 'perplexity', 'in_batch_dim': [8, 496], 'batch_size': 8, 'flops': 9808422927232}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5378812603520}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5378812603520}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4429610391296}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5062411866112}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6011614078336}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4746011128704}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3796808916480}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4113209653888}, {'type': 'perplexity', 'in_batch_dim': [8, 496], 'batch_size': 8, 'flops': 9808422927232}, {'type': 'perplexity', 'in_batch_dim': [8, 448], 'batch_size': 8, 'flops': 8859220715008}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6011614078336}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3796808916480}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3480408179072}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5062411866112}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4429610391296}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4113209653888}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4746011128704}, {'type': 'perplexity', 'in_batch_dim': [8, 320], 'batch_size': 8, 'flops': 6328014815744}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6011614078336}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3480408179072}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3796808916480}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3796808916480}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4746011128704}, {'type': 'perplexity', 'in_batch_dim': [8, 416], 'batch_size': 8, 'flops': 8226419240192}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5695213340928}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6011614078336}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6011614078336}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4746011128704}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3480408179072}, {'type': 'perplexity', 'in_batch_dim': [8, 160], 'batch_size': 8, 'flops': 3164007441664}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5695213340928}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3480408179072}, {'type': 'perplexity', 'in_batch_dim': [8, 400], 'batch_size': 8, 'flops': 7910018502784}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4429610391296}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5378812603520}, {'type': 'perplexity', 'in_batch_dim': [8, 400], 'batch_size': 8, 'flops': 7910018502784}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5378812603520}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3480408179072}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6011614078336}, {'type': 'perplexity', 'in_batch_dim': [8, 160], 'batch_size': 8, 'flops': 3164007441664}, {'type': 'perplexity', 'in_batch_dim': [8, 352], 'batch_size': 8, 'flops': 6960816290560}, {'type': 'perplexity', 'in_batch_dim': [8, 320], 'batch_size': 8, 'flops': 6328014815744}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5062411866112}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5062411866112}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4113209653888}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4429610391296}, {'type': 'perplexity', 'in_batch_dim': [8, 352], 'batch_size': 8, 'flops': 6960816290560}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4113209653888}, {'type': 'perplexity', 'in_batch_dim': [8, 160], 'batch_size': 8, 'flops': 3164007441664}, {'type': 'perplexity', 'in_batch_dim': [8, 336], 'batch_size': 8, 'flops': 6644415553152}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4429610391296}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3796808916480}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5378812603520}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5378812603520}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4113209653888}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5378812603520}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3796808916480}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5378812603520}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5378812603520}, {'type': 'perplexity', 'in_batch_dim': [8, 160], 'batch_size': 8, 'flops': 3164007441664}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3480408179072}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6011614078336}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5695213340928}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5378812603520}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5378812603520}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4429610391296}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4746011128704}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6011614078336}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5062411866112}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6011614078336}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5378812603520}, {'type': 'perplexity', 'in_batch_dim': [8, 160], 'batch_size': 8, 'flops': 3164007441664}, {'type': 'perplexity', 'in_batch_dim': [8, 160], 'batch_size': 8, 'flops': 3164007441664}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5062411866112}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5062411866112}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3796808916480}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6011614078336}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4113209653888}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4746011128704}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5378812603520}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3480408179072}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3796808916480}, {'type': 'perplexity', 'in_batch_dim': [8, 320], 'batch_size': 8, 'flops': 6328014815744}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5062411866112}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4746011128704}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5062411866112}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4746011128704}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6011614078336}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3796808916480}, {'type': 'perplexity', 'in_batch_dim': [8, 160], 'batch_size': 8, 'flops': 3164007441664}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2847606704256}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4429610391296}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4746011128704}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5062411866112}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5378812603520}, {'type': 'perplexity', 'in_batch_dim': [8, 464], 'batch_size': 8, 'flops': 9175621452416}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4113209653888}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3480408179072}, {'type': 'perplexity', 'in_batch_dim': [8, 384], 'batch_size': 8, 'flops': 7593617765376}, {'type': 'perplexity', 'in_batch_dim': [8, 448], 'batch_size': 8, 'flops': 8859220715008}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5378812603520}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4113209653888}, {'type': 'perplexity', 'in_batch_dim': [8, 320], 'batch_size': 8, 'flops': 6328014815744}, {'type': 'perplexity', 'in_batch_dim': [8, 560], 'batch_size': 8, 'flops': 11074025876864}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5378812603520}, {'type': 'perplexity', 'in_batch_dim': [8, 160], 'batch_size': 8, 'flops': 3164007441664}, {'type': 'perplexity', 'in_batch_dim': [8, 320], 'batch_size': 8, 'flops': 6328014815744}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5062411866112}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6011614078336}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4746011128704}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6011614078336}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3796808916480}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4429610391296}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4113209653888}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4746011128704}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3480408179072}, {'type': 'perplexity', 'in_batch_dim': [8, 384], 'batch_size': 8, 'flops': 7593617765376}, {'type': 'perplexity', 'in_batch_dim': [8, 576], 'batch_size': 8, 'flops': 11390426614272}, {'type': 'perplexity', 'in_batch_dim': [8, 160], 'batch_size': 8, 'flops': 3164007441664}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4746011128704}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4746011128704}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4429610391296}, {'type': 'perplexity', 'in_batch_dim': [8, 336], 'batch_size': 8, 'flops': 6644415553152}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4746011128704}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3796808916480}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3796808916480}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5062411866112}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5378812603520}, {'type': 'perplexity', 'in_batch_dim': [8, 368], 'batch_size': 8, 'flops': 7277217027968}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3480408179072}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4113209653888}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4746011128704}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3480408179072}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4746011128704}, {'type': 'perplexity', 'in_batch_dim': [8, 1168], 'batch_size': 8, 'flops': 23097253898368}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5062411866112}, {'type': 'perplexity', 'in_batch_dim': [8, 160], 'batch_size': 8, 'flops': 3164007441664}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4746011128704}, {'type': 'perplexity', 'in_batch_dim': [8, 640], 'batch_size': 8, 'flops': 12656029563904}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5378812603520}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4429610391296}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4429610391296}, {'type': 'perplexity', 'in_batch_dim': [8, 384], 'batch_size': 8, 'flops': 7593617765376}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3480408179072}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4113209653888}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3796808916480}, {'type': 'perplexity', 'in_batch_dim': [8, 336], 'batch_size': 8, 'flops': 6644415553152}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3480408179072}, {'type': 'perplexity', 'in_batch_dim': [6, 208], 'batch_size': 8, 'flops': 4113209653888}], 'timestamp': '2025-09-10 02:24:41.499678', 'step': 4116, 'epoch': 2} {'type': 'pplx', 'content': 19906806.935294818, 'timestamp': '2025-09-10 02:24:41.520920', 'step': 4116, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 8543129804160}, 'timestamp': '2025-09-10 02:24:41.580625', 'step': 4116, 'epoch': 2} {'type': 'loss', 'content': 0.0007578267832286656, 'timestamp': '2025-09-10 02:24:41.597958', 'step': 4117, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 5695507448064}, 'timestamp': '2025-09-10 02:24:41.653595', 'step': 4117, 'epoch': 2} {'type': 'loss', 'content': 0.0012618020409718156, 'timestamp': '2025-09-10 02:24:41.657564', 'step': 4118, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 304], 'flops': 9017733530176}, 'timestamp': '2025-09-10 02:24:41.698161', 'step': 4118, 'epoch': 2} {'type': 'loss', 'content': 0.0005286371451802552, 'timestamp': '2025-09-10 02:24:41.710022', 'step': 4119, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 7119318626112}, 'timestamp': '2025-09-10 02:24:41.789569', 'step': 4119, 'epoch': 2} {'type': 'loss', 'content': 0.003075662301853299, 'timestamp': '2025-09-10 02:24:41.817807', 'step': 4120, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 8068526078144}, 'timestamp': '2025-09-10 02:24:41.903531', 'step': 4120, 'epoch': 2} {'type': 'loss', 'content': 0.0006497717113234103, 'timestamp': '2025-09-10 02:24:41.921573', 'step': 4121, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 5695507448064}, 'timestamp': '2025-09-10 02:24:42.001125', 'step': 4121, 'epoch': 2} {'type': 'loss', 'content': 0.0038573991041630507, 'timestamp': '2025-09-10 02:24:42.018713', 'step': 4122, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 6644714900096}, 'timestamp': '2025-09-10 02:24:42.096979', 'step': 4122, 'epoch': 2} {'type': 'loss', 'content': 0.003996575251221657, 'timestamp': '2025-09-10 02:24:42.105445', 'step': 4123, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 8068526078144}, 'timestamp': '2025-09-10 02:24:42.150019', 'step': 4123, 'epoch': 2} {'type': 'loss', 'content': 0.0023050843738019466, 'timestamp': '2025-09-10 02:24:42.180754', 'step': 4124, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 5695507448064}, 'timestamp': '2025-09-10 02:24:42.215093', 'step': 4124, 'epoch': 2} {'type': 'loss', 'content': 0.0015655980678275228, 'timestamp': '2025-09-10 02:24:42.218567', 'step': 4125, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 7119318626112}, 'timestamp': '2025-09-10 02:24:42.250888', 'step': 4125, 'epoch': 2} {'type': 'loss', 'content': 0.003783722873777151, 'timestamp': '2025-09-10 02:24:42.258287', 'step': 4126, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 6170111174080}, 'timestamp': '2025-09-10 02:24:42.292363', 'step': 4126, 'epoch': 2} {'type': 'loss', 'content': 0.000846231181640178, 'timestamp': '2025-09-10 02:24:42.298745', 'step': 4127, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 7119318626112}, 'timestamp': '2025-09-10 02:24:42.330802', 'step': 4127, 'epoch': 2} {'type': 'loss', 'content': 0.0018645375967025757, 'timestamp': '2025-09-10 02:24:42.358824', 'step': 4128, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 7119318626112}, 'timestamp': '2025-09-10 02:24:42.391712', 'step': 4128, 'epoch': 2} {'type': 'loss', 'content': 0.0006940880557522178, 'timestamp': '2025-09-10 02:24:42.395966', 'step': 4129, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 304], 'flops': 9017733530176}, 'timestamp': '2025-09-10 02:24:42.428546', 'step': 4129, 'epoch': 2} {'type': 'loss', 'content': 0.001143784262239933, 'timestamp': '2025-09-10 02:24:42.439938', 'step': 4130, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 6170111174080}, 'timestamp': '2025-09-10 02:24:42.472405', 'step': 4130, 'epoch': 2} {'type': 'loss', 'content': 0.0018830805784091353, 'timestamp': '2025-09-10 02:24:42.478601', 'step': 4131, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 7593922352128}, 'timestamp': '2025-09-10 02:24:42.510561', 'step': 4131, 'epoch': 2} {'type': 'loss', 'content': 0.0007161656394600868, 'timestamp': '2025-09-10 02:24:42.539023', 'step': 4132, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 6170111174080}, 'timestamp': '2025-09-10 02:24:42.570242', 'step': 4132, 'epoch': 2} {'type': 'loss', 'content': 0.0023870845325291157, 'timestamp': '2025-09-10 02:24:42.574528', 'step': 4133, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 336], 'flops': 9966940982208}, 'timestamp': '2025-09-10 02:24:42.607954', 'step': 4133, 'epoch': 2} {'type': 'loss', 'content': 0.01680189184844494, 'timestamp': '2025-09-10 02:24:42.621289', 'step': 4134, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 8543129804160}, 'timestamp': '2025-09-10 02:24:42.652852', 'step': 4134, 'epoch': 2} {'type': 'loss', 'content': 0.0025117939803749323, 'timestamp': '2025-09-10 02:24:42.663506', 'step': 4135, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 4746299996032}, 'timestamp': '2025-09-10 02:24:42.694081', 'step': 4135, 'epoch': 2} {'type': 'loss', 'content': 0.01693909242749214, 'timestamp': '2025-09-10 02:24:42.717805', 'step': 4136, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 8543129804160}, 'timestamp': '2025-09-10 02:24:42.749274', 'step': 4136, 'epoch': 2} {'type': 'loss', 'content': 0.0008668032241985202, 'timestamp': '2025-09-10 02:24:42.757944', 'step': 4137, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 384], 'flops': 11390752160256}, 'timestamp': '2025-09-10 02:24:42.794806', 'step': 4137, 'epoch': 2} {'type': 'loss', 'content': 0.0012921657180413604, 'timestamp': '2025-09-10 02:24:42.808844', 'step': 4138, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 8543129804160}, 'timestamp': '2025-09-10 02:24:42.841435', 'step': 4138, 'epoch': 2} {'type': 'loss', 'content': 0.0010435592848807573, 'timestamp': '2025-09-10 02:24:42.851930', 'step': 4139, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 5695507448064}, 'timestamp': '2025-09-10 02:24:42.883699', 'step': 4139, 'epoch': 2} {'type': 'loss', 'content': 0.005313398782163858, 'timestamp': '2025-09-10 02:24:42.908438', 'step': 4140, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 400], 'flops': 11865355886272}, 'timestamp': '2025-09-10 02:24:42.945631', 'step': 4140, 'epoch': 2} {'type': 'loss', 'content': 0.009342093952000141, 'timestamp': '2025-09-10 02:24:42.960811', 'step': 4141, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 8543129804160}, 'timestamp': '2025-09-10 02:24:42.992178', 'step': 4141, 'epoch': 2} {'type': 'loss', 'content': 0.0005205129855312407, 'timestamp': '2025-09-10 02:24:43.003346', 'step': 4142, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 4746299996032}, 'timestamp': '2025-09-10 02:24:43.033595', 'step': 4142, 'epoch': 2} {'type': 'loss', 'content': 0.0024474586825817823, 'timestamp': '2025-09-10 02:24:43.036392', 'step': 4143, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 6170111174080}, 'timestamp': '2025-09-10 02:24:43.067860', 'step': 4143, 'epoch': 2} {'type': 'loss', 'content': 0.00045305112143978477, 'timestamp': '2025-09-10 02:24:43.095054', 'step': 4144, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:24:43.125624', 'step': 4144, 'epoch': 2} {'type': 'loss', 'content': 0.0028814957477152348, 'timestamp': '2025-09-10 02:24:43.127838', 'step': 4145, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 6170111174080}, 'timestamp': '2025-09-10 02:24:43.158326', 'step': 4145, 'epoch': 2} {'type': 'loss', 'content': 0.000609158945735544, 'timestamp': '2025-09-10 02:24:43.165151', 'step': 4146, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 176], 'flops': 5220903722048}, 'timestamp': '2025-09-10 02:24:43.195363', 'step': 4146, 'epoch': 2} {'type': 'loss', 'content': 0.0028953743167221546, 'timestamp': '2025-09-10 02:24:43.199436', 'step': 4147, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 400], 'flops': 11865355886272}, 'timestamp': '2025-09-10 02:24:43.237345', 'step': 4147, 'epoch': 2} {'type': 'loss', 'content': 0.0039150347001850605, 'timestamp': '2025-09-10 02:24:43.273804', 'step': 4148, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 8068526078144}, 'timestamp': '2025-09-10 02:24:43.304696', 'step': 4148, 'epoch': 2} {'type': 'loss', 'content': 0.001472481875680387, 'timestamp': '2025-09-10 02:24:43.312523', 'step': 4149, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 384], 'flops': 11390752160256}, 'timestamp': '2025-09-10 02:24:43.348228', 'step': 4149, 'epoch': 2} {'type': 'loss', 'content': 0.0046607027761638165, 'timestamp': '2025-09-10 02:24:43.362171', 'step': 4150, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 6644714900096}, 'timestamp': '2025-09-10 02:24:43.392806', 'step': 4150, 'epoch': 2} {'type': 'loss', 'content': 0.005251821596175432, 'timestamp': '2025-09-10 02:24:43.399769', 'step': 4151, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 5695507448064}, 'timestamp': '2025-09-10 02:24:43.431584', 'step': 4151, 'epoch': 2} {'type': 'loss', 'content': 0.0006825706223025918, 'timestamp': '2025-09-10 02:24:43.456710', 'step': 4152, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 4746299996032}, 'timestamp': '2025-09-10 02:24:43.486943', 'step': 4152, 'epoch': 2} {'type': 'loss', 'content': 0.008997195400297642, 'timestamp': '2025-09-10 02:24:43.489487', 'step': 4153, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 5695507448064}, 'timestamp': '2025-09-10 02:24:43.520251', 'step': 4153, 'epoch': 2} {'type': 'loss', 'content': 0.00021450709027703851, 'timestamp': '2025-09-10 02:24:43.524736', 'step': 4154, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 7119318626112}, 'timestamp': '2025-09-10 02:24:43.556031', 'step': 4154, 'epoch': 2} {'type': 'loss', 'content': 0.0032714589033275843, 'timestamp': '2025-09-10 02:24:43.563521', 'step': 4155, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 6170111174080}, 'timestamp': '2025-09-10 02:24:43.598714', 'step': 4155, 'epoch': 2} {'type': 'loss', 'content': 0.0018134496640414, 'timestamp': '2025-09-10 02:24:43.626447', 'step': 4156, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 304], 'flops': 9017733530176}, 'timestamp': '2025-09-10 02:24:43.661551', 'step': 4156, 'epoch': 2} {'type': 'loss', 'content': 0.0035523748956620693, 'timestamp': '2025-09-10 02:24:43.671258', 'step': 4157, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 336], 'flops': 9966940982208}, 'timestamp': '2025-09-10 02:24:43.711589', 'step': 4157, 'epoch': 2} {'type': 'loss', 'content': 0.026274150237441063, 'timestamp': '2025-09-10 02:24:43.724961', 'step': 4158, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 336], 'flops': 9966940982208}, 'timestamp': '2025-09-10 02:24:43.759358', 'step': 4158, 'epoch': 2} {'type': 'loss', 'content': 0.0005512969219125807, 'timestamp': '2025-09-10 02:24:43.772658', 'step': 4159, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 4746299996032}, 'timestamp': '2025-09-10 02:24:43.803614', 'step': 4159, 'epoch': 2} {'type': 'loss', 'content': 0.00017312598356511444, 'timestamp': '2025-09-10 02:24:43.827260', 'step': 4160, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 5695507448064}, 'timestamp': '2025-09-10 02:24:43.857429', 'step': 4160, 'epoch': 2} {'type': 'loss', 'content': 0.0010247546015307307, 'timestamp': '2025-09-10 02:24:43.860571', 'step': 4161, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 5695507448064}, 'timestamp': '2025-09-10 02:24:43.890887', 'step': 4161, 'epoch': 2} {'type': 'loss', 'content': 0.02129758708178997, 'timestamp': '2025-09-10 02:24:43.895411', 'step': 4162, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 7119318626112}, 'timestamp': '2025-09-10 02:24:43.929362', 'step': 4162, 'epoch': 2} {'type': 'loss', 'content': 0.00029126249137334526, 'timestamp': '2025-09-10 02:24:43.938239', 'step': 4163, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 336], 'flops': 9966940982208}, 'timestamp': '2025-09-10 02:24:43.981123', 'step': 4163, 'epoch': 2} {'type': 'loss', 'content': 0.000328995258314535, 'timestamp': '2025-09-10 02:24:44.015235', 'step': 4164, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 5695507448064}, 'timestamp': '2025-09-10 02:24:44.045898', 'step': 4164, 'epoch': 2} {'type': 'loss', 'content': 0.00022505922242999077, 'timestamp': '2025-09-10 02:24:44.047935', 'step': 4165, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 8543129804160}, 'timestamp': '2025-09-10 02:24:44.079952', 'step': 4165, 'epoch': 2} {'type': 'loss', 'content': 0.0010929142590612173, 'timestamp': '2025-09-10 02:24:44.090664', 'step': 4166, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 8068526078144}, 'timestamp': '2025-09-10 02:24:44.122893', 'step': 4166, 'epoch': 2} {'type': 'loss', 'content': 0.000381884427042678, 'timestamp': '2025-09-10 02:24:44.132234', 'step': 4167, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 4746299996032}, 'timestamp': '2025-09-10 02:24:44.164070', 'step': 4167, 'epoch': 2} {'type': 'loss', 'content': 0.008006826043128967, 'timestamp': '2025-09-10 02:24:44.188101', 'step': 4168, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 304], 'flops': 9017733530176}, 'timestamp': '2025-09-10 02:24:44.219173', 'step': 4168, 'epoch': 2} {'type': 'loss', 'content': 0.0003847822081297636, 'timestamp': '2025-09-10 02:24:44.228953', 'step': 4169, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 6170111174080}, 'timestamp': '2025-09-10 02:24:44.259640', 'step': 4169, 'epoch': 2} {'type': 'loss', 'content': 0.002151952590793371, 'timestamp': '2025-09-10 02:24:44.266362', 'step': 4170, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 8068526078144}, 'timestamp': '2025-09-10 02:24:44.296706', 'step': 4170, 'epoch': 2} {'type': 'loss', 'content': 0.0007263789302669466, 'timestamp': '2025-09-10 02:24:44.306932', 'step': 4171, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 304], 'flops': 9017733530176}, 'timestamp': '2025-09-10 02:24:44.337993', 'step': 4171, 'epoch': 2} {'type': 'loss', 'content': 0.014661334455013275, 'timestamp': '2025-09-10 02:24:44.370694', 'step': 4172, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-10 02:24:44.401986', 'step': 4172, 'epoch': 2} {'type': 'loss', 'content': 0.001283987076021731, 'timestamp': '2025-09-10 02:24:44.407582', 'step': 4173, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 352], 'flops': 10441544708224}, 'timestamp': '2025-09-10 02:24:44.442184', 'step': 4173, 'epoch': 2} {'type': 'loss', 'content': 0.0033758750651031733, 'timestamp': '2025-09-10 02:24:44.455922', 'step': 4174, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 8068526078144}, 'timestamp': '2025-09-10 02:24:44.499085', 'step': 4174, 'epoch': 2} {'type': 'loss', 'content': 0.0006651729927398264, 'timestamp': '2025-09-10 02:24:44.508438', 'step': 4175, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 6644714900096}, 'timestamp': '2025-09-10 02:24:44.552371', 'step': 4175, 'epoch': 2} {'type': 'loss', 'content': 0.022680295631289482, 'timestamp': '2025-09-10 02:24:44.579998', 'step': 4176, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 8543129804160}, 'timestamp': '2025-09-10 02:24:44.621199', 'step': 4176, 'epoch': 2} {'type': 'loss', 'content': 0.005751411896198988, 'timestamp': '2025-09-10 02:24:44.629067', 'step': 4177, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 6644714900096}, 'timestamp': '2025-09-10 02:24:44.675082', 'step': 4177, 'epoch': 2} {'type': 'loss', 'content': 0.00028175374609418213, 'timestamp': '2025-09-10 02:24:44.681997', 'step': 4178, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 384], 'flops': 11390752160256}, 'timestamp': '2025-09-10 02:24:44.728441', 'step': 4178, 'epoch': 2} {'type': 'loss', 'content': 0.00025079899933189154, 'timestamp': '2025-09-10 02:24:44.742394', 'step': 4179, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 6170111174080}, 'timestamp': '2025-09-10 02:24:44.781232', 'step': 4179, 'epoch': 2} {'type': 'loss', 'content': 0.002579400083050132, 'timestamp': '2025-09-10 02:24:44.808965', 'step': 4180, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 320], 'flops': 9492337256192}, 'timestamp': '2025-09-10 02:24:44.848373', 'step': 4180, 'epoch': 2} {'type': 'loss', 'content': 0.0009939942974597216, 'timestamp': '2025-09-10 02:24:44.858729', 'step': 4181, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 6644714900096}, 'timestamp': '2025-09-10 02:24:44.892789', 'step': 4181, 'epoch': 2} {'type': 'loss', 'content': 0.00024698293418623507, 'timestamp': '2025-09-10 02:24:44.899796', 'step': 4182, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 7593922352128}, 'timestamp': '2025-09-10 02:24:44.933836', 'step': 4182, 'epoch': 2} {'type': 'loss', 'content': 0.015613092109560966, 'timestamp': '2025-09-10 02:24:44.941548', 'step': 4183, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 6170111174080}, 'timestamp': '2025-09-10 02:24:44.974017', 'step': 4183, 'epoch': 2} {'type': 'loss', 'content': 0.0022488494869321585, 'timestamp': '2025-09-10 02:24:45.001793', 'step': 4184, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-10 02:24:45.035497', 'step': 4184, 'epoch': 2} {'type': 'loss', 'content': 0.0015916310949251056, 'timestamp': '2025-09-10 02:24:45.037589', 'step': 4185, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 6644714900096}, 'timestamp': '2025-09-10 02:24:45.068522', 'step': 4185, 'epoch': 2} {'type': 'loss', 'content': 0.0012442750157788396, 'timestamp': '2025-09-10 02:24:45.075681', 'step': 4186, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 8068526078144}, 'timestamp': '2025-09-10 02:24:45.107142', 'step': 4186, 'epoch': 2} {'type': 'loss', 'content': 0.00012972517288289964, 'timestamp': '2025-09-10 02:24:45.117217', 'step': 4187, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 6170111174080}, 'timestamp': '2025-09-10 02:24:45.148182', 'step': 4187, 'epoch': 2} {'type': 'loss', 'content': 0.00011532863572938368, 'timestamp': '2025-09-10 02:24:45.175919', 'step': 4188, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 5695507448064}, 'timestamp': '2025-09-10 02:24:45.207610', 'step': 4188, 'epoch': 2} {'type': 'loss', 'content': 0.0004722306621260941, 'timestamp': '2025-09-10 02:24:45.209939', 'step': 4189, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 176], 'flops': 5220903722048}, 'timestamp': '2025-09-10 02:24:45.240550', 'step': 4189, 'epoch': 2} {'type': 'loss', 'content': 0.0006129414541646838, 'timestamp': '2025-09-10 02:24:45.244713', 'step': 4190, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 7593922352128}, 'timestamp': '2025-09-10 02:24:45.277997', 'step': 4190, 'epoch': 2} {'type': 'loss', 'content': 0.016751401126384735, 'timestamp': '2025-09-10 02:24:45.285710', 'step': 4191, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 7593922352128}, 'timestamp': '2025-09-10 02:24:45.317813', 'step': 4191, 'epoch': 2} {'type': 'loss', 'content': 0.0004313217068556696, 'timestamp': '2025-09-10 02:24:45.346485', 'step': 4192, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 8068526078144}, 'timestamp': '2025-09-10 02:24:45.379053', 'step': 4192, 'epoch': 2} {'type': 'loss', 'content': 0.0681036114692688, 'timestamp': '2025-09-10 02:24:45.387056', 'step': 4193, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 7119318626112}, 'timestamp': '2025-09-10 02:24:45.422109', 'step': 4193, 'epoch': 2} {'type': 'loss', 'content': 0.0021573789417743683, 'timestamp': '2025-09-10 02:24:45.429574', 'step': 4194, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 304], 'flops': 9017733530176}, 'timestamp': '2025-09-10 02:24:45.462679', 'step': 4194, 'epoch': 2} {'type': 'loss', 'content': 0.00015258920029737055, 'timestamp': '2025-09-10 02:24:45.474668', 'step': 4195, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 6170111174080}, 'timestamp': '2025-09-10 02:24:45.505846', 'step': 4195, 'epoch': 2} {'type': 'loss', 'content': 0.0043427967466413975, 'timestamp': '2025-09-10 02:24:45.533466', 'step': 4196, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 304], 'flops': 9017733530176}, 'timestamp': '2025-09-10 02:24:45.564991', 'step': 4196, 'epoch': 2} {'type': 'loss', 'content': 6.901784217916429e-05, 'timestamp': '2025-09-10 02:24:45.574286', 'step': 4197, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 176], 'flops': 5220903722048}, 'timestamp': '2025-09-10 02:24:45.605121', 'step': 4197, 'epoch': 2} {'type': 'loss', 'content': 0.006661687511950731, 'timestamp': '2025-09-10 02:24:45.609203', 'step': 4198, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 7119318626112}, 'timestamp': '2025-09-10 02:24:45.642559', 'step': 4198, 'epoch': 2} {'type': 'loss', 'content': 0.00025688271853141487, 'timestamp': '2025-09-10 02:24:45.650157', 'step': 4199, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 7593922352128}, 'timestamp': '2025-09-10 02:24:45.681105', 'step': 4199, 'epoch': 2} {'type': 'loss', 'content': 0.010232685133814812, 'timestamp': '2025-09-10 02:24:45.709890', 'step': 4200, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 7593922352128}, 'timestamp': '2025-09-10 02:24:45.743640', 'step': 4200, 'epoch': 2} {'type': 'loss', 'content': 0.017913201823830605, 'timestamp': '2025-09-10 02:24:45.749224', 'step': 4201, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 6644714900096}, 'timestamp': '2025-09-10 02:24:45.783408', 'step': 4201, 'epoch': 2} {'type': 'loss', 'content': 0.000634572294075042, 'timestamp': '2025-09-10 02:24:45.790563', 'step': 4202, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 7593922352128}, 'timestamp': '2025-09-10 02:24:45.823229', 'step': 4202, 'epoch': 2} {'type': 'loss', 'content': 0.001113194739446044, 'timestamp': '2025-09-10 02:24:45.830989', 'step': 4203, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 8543129804160}, 'timestamp': '2025-09-10 02:24:45.862907', 'step': 4203, 'epoch': 2} {'type': 'loss', 'content': 4.078313577338122e-05, 'timestamp': '2025-09-10 02:24:45.894873', 'step': 4204, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 352], 'flops': 10441544708224}, 'timestamp': '2025-09-10 02:24:45.927700', 'step': 4204, 'epoch': 2} {'type': 'loss', 'content': 7.009686669334769e-05, 'timestamp': '2025-09-10 02:24:45.940748', 'step': 4205, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 6170111174080}, 'timestamp': '2025-09-10 02:24:45.971903', 'step': 4205, 'epoch': 2} {'type': 'loss', 'content': 0.013100274838507175, 'timestamp': '2025-09-10 02:24:45.978878', 'step': 4206, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 7119318626112}, 'timestamp': '2025-09-10 02:24:46.010401', 'step': 4206, 'epoch': 2} {'type': 'loss', 'content': 0.016267575323581696, 'timestamp': '2025-09-10 02:24:46.017892', 'step': 4207, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 7119318626112}, 'timestamp': '2025-09-10 02:24:46.049063', 'step': 4207, 'epoch': 2} {'type': 'loss', 'content': 0.00022457198065239936, 'timestamp': '2025-09-10 02:24:46.077377', 'step': 4208, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 416], 'flops': 12339959612288}, 'timestamp': '2025-09-10 02:24:46.115376', 'step': 4208, 'epoch': 2} {'type': 'loss', 'content': 0.0004984191036783159, 'timestamp': '2025-09-10 02:24:46.130853', 'step': 4209, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 848], 'flops': 25154260214720}, 'timestamp': '2025-09-10 02:24:46.202347', 'step': 4209, 'epoch': 2} {'type': 'loss', 'content': 0.0003745494468603283, 'timestamp': '2025-09-10 02:24:46.231818', 'step': 4210, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 320], 'flops': 9492337256192}, 'timestamp': '2025-09-10 02:24:46.263380', 'step': 4210, 'epoch': 2} {'type': 'loss', 'content': 0.00043498026207089424, 'timestamp': '2025-09-10 02:24:46.275916', 'step': 4211, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 176], 'flops': 5220903722048}, 'timestamp': '2025-09-10 02:24:46.306859', 'step': 4211, 'epoch': 2} {'type': 'loss', 'content': 0.00025909944088198245, 'timestamp': '2025-09-10 02:24:46.331627', 'step': 4212, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 5695507448064}, 'timestamp': '2025-09-10 02:24:46.362621', 'step': 4212, 'epoch': 2} {'type': 'loss', 'content': 0.0015551492106169462, 'timestamp': '2025-09-10 02:24:46.365636', 'step': 4213, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 8068526078144}, 'timestamp': '2025-09-10 02:24:46.396632', 'step': 4213, 'epoch': 2} {'type': 'loss', 'content': 0.0015605135122314095, 'timestamp': '2025-09-10 02:24:46.406815', 'step': 4214, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 6170111174080}, 'timestamp': '2025-09-10 02:24:46.440241', 'step': 4214, 'epoch': 2} {'type': 'loss', 'content': 0.020354004576802254, 'timestamp': '2025-09-10 02:24:46.447335', 'step': 4215, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 352], 'flops': 10441544708224}, 'timestamp': '2025-09-10 02:24:46.481626', 'step': 4215, 'epoch': 2} {'type': 'loss', 'content': 0.033257655799388885, 'timestamp': '2025-09-10 02:24:46.516215', 'step': 4216, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 384], 'flops': 11390752160256}, 'timestamp': '2025-09-10 02:24:46.549841', 'step': 4216, 'epoch': 2} {'type': 'loss', 'content': 0.0032047501299530268, 'timestamp': '2025-09-10 02:24:46.563141', 'step': 4217, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 176], 'flops': 5220903722048}, 'timestamp': '2025-09-10 02:24:46.595764', 'step': 4217, 'epoch': 2} {'type': 'loss', 'content': 0.0013879657490178943, 'timestamp': '2025-09-10 02:24:46.600157', 'step': 4218, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 512], 'flops': 15187581968384}, 'timestamp': '2025-09-10 02:24:46.643464', 'step': 4218, 'epoch': 2} {'type': 'loss', 'content': 0.00234273006208241, 'timestamp': '2025-09-10 02:24:46.661172', 'step': 4219, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 432], 'flops': 12814563338304}, 'timestamp': '2025-09-10 02:24:46.702414', 'step': 4219, 'epoch': 2} {'type': 'loss', 'content': 0.0005795766483061016, 'timestamp': '2025-09-10 02:24:46.739479', 'step': 4220, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 5695507448064}, 'timestamp': '2025-09-10 02:24:46.770239', 'step': 4220, 'epoch': 2} {'type': 'loss', 'content': 0.0004290399665478617, 'timestamp': '2025-09-10 02:24:46.772483', 'step': 4221, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 320], 'flops': 9492337256192}, 'timestamp': '2025-09-10 02:24:46.803784', 'step': 4221, 'epoch': 2} {'type': 'loss', 'content': 0.017445342615246773, 'timestamp': '2025-09-10 02:24:46.816337', 'step': 4222, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 7593922352128}, 'timestamp': '2025-09-10 02:24:46.847245', 'step': 4222, 'epoch': 2} {'type': 'loss', 'content': 0.0006286733550950885, 'timestamp': '2025-09-10 02:24:46.855283', 'step': 4223, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 8543129804160}, 'timestamp': '2025-09-10 02:24:46.886289', 'step': 4223, 'epoch': 2} {'type': 'loss', 'content': 0.0007370785460807383, 'timestamp': '2025-09-10 02:24:46.918074', 'step': 4224, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 8543129804160}, 'timestamp': '2025-09-10 02:24:46.948655', 'step': 4224, 'epoch': 2} {'type': 'loss', 'content': 0.004575326107442379, 'timestamp': '2025-09-10 02:24:46.957270', 'step': 4225, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 6644714900096}, 'timestamp': '2025-09-10 02:24:46.988637', 'step': 4225, 'epoch': 2} {'type': 'loss', 'content': 0.0020077417138963938, 'timestamp': '2025-09-10 02:24:46.995633', 'step': 4226, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 336], 'flops': 9966940982208}, 'timestamp': '2025-09-10 02:24:47.032864', 'step': 4226, 'epoch': 2} {'type': 'loss', 'content': 0.001171283540315926, 'timestamp': '2025-09-10 02:24:47.046226', 'step': 4227, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 6170111174080}, 'timestamp': '2025-09-10 02:24:47.077882', 'step': 4227, 'epoch': 2} {'type': 'loss', 'content': 0.00035053075407631695, 'timestamp': '2025-09-10 02:24:47.105616', 'step': 4228, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 8068526078144}, 'timestamp': '2025-09-10 02:24:47.136916', 'step': 4228, 'epoch': 2} {'type': 'loss', 'content': 0.0004195565707050264, 'timestamp': '2025-09-10 02:24:47.144823', 'step': 4229, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 6644714900096}, 'timestamp': '2025-09-10 02:24:47.176989', 'step': 4229, 'epoch': 2} {'type': 'loss', 'content': 0.0031623467803001404, 'timestamp': '2025-09-10 02:24:47.183791', 'step': 4230, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:24:47.215231', 'step': 4230, 'epoch': 2} {'type': 'loss', 'content': 0.00927420798689127, 'timestamp': '2025-09-10 02:24:47.217461', 'step': 4231, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 400], 'flops': 11865355886272}, 'timestamp': '2025-09-10 02:24:47.256876', 'step': 4231, 'epoch': 2} {'type': 'loss', 'content': 0.00882531888782978, 'timestamp': '2025-09-10 02:24:47.293361', 'step': 4232, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 5695507448064}, 'timestamp': '2025-09-10 02:24:47.324758', 'step': 4232, 'epoch': 2} {'type': 'loss', 'content': 0.0013175641652196646, 'timestamp': '2025-09-10 02:24:47.327064', 'step': 4233, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 320], 'flops': 9492337256192}, 'timestamp': '2025-09-10 02:24:47.360596', 'step': 4233, 'epoch': 2} {'type': 'loss', 'content': 0.02007768489420414, 'timestamp': '2025-09-10 02:24:47.373186', 'step': 4234, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 8068526078144}, 'timestamp': '2025-09-10 02:24:47.405075', 'step': 4234, 'epoch': 2} {'type': 'loss', 'content': 0.014163470827043056, 'timestamp': '2025-09-10 02:24:47.415328', 'step': 4235, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 6644714900096}, 'timestamp': '2025-09-10 02:24:47.446112', 'step': 4235, 'epoch': 2} {'type': 'loss', 'content': 0.02005593292415142, 'timestamp': '2025-09-10 02:24:47.474100', 'step': 4236, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 400], 'flops': 11865355886272}, 'timestamp': '2025-09-10 02:24:47.510982', 'step': 4236, 'epoch': 2} {'type': 'loss', 'content': 0.0009125882061198354, 'timestamp': '2025-09-10 02:24:47.526175', 'step': 4237, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 7593922352128}, 'timestamp': '2025-09-10 02:24:47.557015', 'step': 4237, 'epoch': 2} {'type': 'loss', 'content': 0.01142832636833191, 'timestamp': '2025-09-10 02:24:47.564834', 'step': 4238, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 7119318626112}, 'timestamp': '2025-09-10 02:24:47.595788', 'step': 4238, 'epoch': 2} {'type': 'loss', 'content': 0.018545877188444138, 'timestamp': '2025-09-10 02:24:47.603091', 'step': 4239, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 352], 'flops': 10441544708224}, 'timestamp': '2025-09-10 02:24:47.637638', 'step': 4239, 'epoch': 2} {'type': 'loss', 'content': 0.0013059125049039721, 'timestamp': '2025-09-10 02:24:47.672262', 'step': 4240, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 7119318626112}, 'timestamp': '2025-09-10 02:24:47.704270', 'step': 4240, 'epoch': 2} {'type': 'loss', 'content': 0.0013210356701165438, 'timestamp': '2025-09-10 02:24:47.708942', 'step': 4241, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 6170111174080}, 'timestamp': '2025-09-10 02:24:47.739973', 'step': 4241, 'epoch': 2} {'type': 'loss', 'content': 0.014471757225692272, 'timestamp': '2025-09-10 02:24:47.746657', 'step': 4242, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 320], 'flops': 9492337256192}, 'timestamp': '2025-09-10 02:24:47.778557', 'step': 4242, 'epoch': 2} {'type': 'loss', 'content': 0.006982952821999788, 'timestamp': '2025-09-10 02:24:47.791126', 'step': 4243, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 6170111174080}, 'timestamp': '2025-09-10 02:24:47.822714', 'step': 4243, 'epoch': 2} {'type': 'loss', 'content': 0.0017880608793348074, 'timestamp': '2025-09-10 02:24:47.850346', 'step': 4244, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 7119318626112}, 'timestamp': '2025-09-10 02:24:47.880878', 'step': 4244, 'epoch': 2} {'type': 'loss', 'content': 0.005929925944656134, 'timestamp': '2025-09-10 02:24:47.886066', 'step': 4245, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 7593922352128}, 'timestamp': '2025-09-10 02:24:47.917504', 'step': 4245, 'epoch': 2} {'type': 'loss', 'content': 0.004261931870132685, 'timestamp': '2025-09-10 02:24:47.925253', 'step': 4246, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 6644714900096}, 'timestamp': '2025-09-10 02:24:47.955408', 'step': 4246, 'epoch': 2} {'type': 'loss', 'content': 0.0011088837636634707, 'timestamp': '2025-09-10 02:24:47.962565', 'step': 4247, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 7593922352128}, 'timestamp': '2025-09-10 02:24:48.013895', 'step': 4247, 'epoch': 2} {'type': 'loss', 'content': 0.009494653902947903, 'timestamp': '2025-09-10 02:24:48.042624', 'step': 4248, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 6170111174080}, 'timestamp': '2025-09-10 02:24:48.072576', 'step': 4248, 'epoch': 2} {'type': 'loss', 'content': 0.0014409434515982866, 'timestamp': '2025-09-10 02:24:48.077188', 'step': 4249, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 7593922352128}, 'timestamp': '2025-09-10 02:24:48.109097', 'step': 4249, 'epoch': 2} {'type': 'loss', 'content': 0.0007126140990294516, 'timestamp': '2025-09-10 02:24:48.116845', 'step': 4250, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 656], 'flops': 19459015502528}, 'timestamp': '2025-09-10 02:24:48.171651', 'step': 4250, 'epoch': 2} {'type': 'loss', 'content': 0.0010598188964650035, 'timestamp': '2025-09-10 02:24:48.195085', 'step': 4251, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 8543129804160}, 'timestamp': '2025-09-10 02:24:48.230399', 'step': 4251, 'epoch': 2} {'type': 'loss', 'content': 0.0014534889487549663, 'timestamp': '2025-09-10 02:24:48.261522', 'step': 4252, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 6644714900096}, 'timestamp': '2025-09-10 02:24:48.293468', 'step': 4252, 'epoch': 2} {'type': 'loss', 'content': 0.0003932244435418397, 'timestamp': '2025-09-10 02:24:48.297886', 'step': 4253, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 416], 'flops': 12339959612288}, 'timestamp': '2025-09-10 02:24:48.336730', 'step': 4253, 'epoch': 2} {'type': 'loss', 'content': 0.0024409524630755186, 'timestamp': '2025-09-10 02:24:48.352588', 'step': 4254, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 6170111174080}, 'timestamp': '2025-09-10 02:24:48.383837', 'step': 4254, 'epoch': 2} {'type': 'loss', 'content': 0.00022083787189330906, 'timestamp': '2025-09-10 02:24:48.390767', 'step': 4255, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 7593922352128}, 'timestamp': '2025-09-10 02:24:48.423234', 'step': 4255, 'epoch': 2} {'type': 'loss', 'content': 0.004927542991936207, 'timestamp': '2025-09-10 02:24:48.451762', 'step': 4256, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 304], 'flops': 9017733530176}, 'timestamp': '2025-09-10 02:24:48.484907', 'step': 4256, 'epoch': 2} {'type': 'loss', 'content': 0.0016823627520352602, 'timestamp': '2025-09-10 02:24:48.494669', 'step': 4257, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 368], 'flops': 10916148434240}, 'timestamp': '2025-09-10 02:24:48.530118', 'step': 4257, 'epoch': 2} {'type': 'loss', 'content': 0.00010247322643408552, 'timestamp': '2025-09-10 02:24:48.543899', 'step': 4258, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 304], 'flops': 9017733530176}, 'timestamp': '2025-09-10 02:24:48.575265', 'step': 4258, 'epoch': 2} {'type': 'loss', 'content': 0.0012629638658836484, 'timestamp': '2025-09-10 02:24:48.587583', 'step': 4259, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 8068526078144}, 'timestamp': '2025-09-10 02:24:48.620934', 'step': 4259, 'epoch': 2} {'type': 'loss', 'content': 0.002863981993868947, 'timestamp': '2025-09-10 02:24:48.652075', 'step': 4260, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 352], 'flops': 10441544708224}, 'timestamp': '2025-09-10 02:24:48.685050', 'step': 4260, 'epoch': 2} {'type': 'loss', 'content': 0.00011709488171618432, 'timestamp': '2025-09-10 02:24:48.698047', 'step': 4261, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 7593922352128}, 'timestamp': '2025-09-10 02:24:48.728803', 'step': 4261, 'epoch': 2} {'type': 'loss', 'content': 0.0035543248523026705, 'timestamp': '2025-09-10 02:24:48.736430', 'step': 4262, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 5695507448064}, 'timestamp': '2025-09-10 02:24:48.768966', 'step': 4262, 'epoch': 2} {'type': 'loss', 'content': 0.0005866262363269925, 'timestamp': '2025-09-10 02:24:48.773009', 'step': 4263, 'epoch': 2} {'type': 'flops', 'content': [{'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5695213340928}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3796808916480}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4429610391296}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4746011128704}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4746011128704}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6011614078336}, {'type': 'perplexity', 'in_batch_dim': [8, 736], 'batch_size': 8, 'flops': 14554433988352}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3796808916480}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4113209653888}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3796808916480}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4746011128704}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2847606704256}, {'type': 'perplexity', 'in_batch_dim': [8, 320], 'batch_size': 8, 'flops': 6328014815744}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5378812603520}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3796808916480}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4746011128704}, {'type': 'perplexity', 'in_batch_dim': [8, 352], 'batch_size': 8, 'flops': 6960816290560}, {'type': 'perplexity', 'in_batch_dim': [8, 384], 'batch_size': 8, 'flops': 7593617765376}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5378812603520}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4429610391296}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4113209653888}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5062411866112}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5062411866112}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4429610391296}, {'type': 'perplexity', 'in_batch_dim': [8, 480], 'batch_size': 8, 'flops': 9492022189824}, {'type': 'perplexity', 'in_batch_dim': [8, 448], 'batch_size': 8, 'flops': 8859220715008}, {'type': 'perplexity', 'in_batch_dim': [8, 320], 'batch_size': 8, 'flops': 6328014815744}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3796808916480}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6011614078336}, {'type': 'perplexity', 'in_batch_dim': [8, 320], 'batch_size': 8, 'flops': 6328014815744}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5695213340928}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4746011128704}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4429610391296}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5378812603520}, {'type': 'perplexity', 'in_batch_dim': [8, 544], 'batch_size': 8, 'flops': 10757625139456}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4429610391296}, {'type': 'perplexity', 'in_batch_dim': [8, 320], 'batch_size': 8, 'flops': 6328014815744}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4113209653888}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4113209653888}, {'type': 'perplexity', 'in_batch_dim': [8, 320], 'batch_size': 8, 'flops': 6328014815744}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3480408179072}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5062411866112}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6011614078336}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4429610391296}, {'type': 'perplexity', 'in_batch_dim': [8, 384], 'batch_size': 8, 'flops': 7593617765376}, {'type': 'perplexity', 'in_batch_dim': [8, 416], 'batch_size': 8, 'flops': 8226419240192}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4746011128704}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3480408179072}, {'type': 'perplexity', 'in_batch_dim': [8, 384], 'batch_size': 8, 'flops': 7593617765376}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3480408179072}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3480408179072}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5378812603520}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3796808916480}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4113209653888}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5062411866112}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5378812603520}, {'type': 'perplexity', 'in_batch_dim': [8, 320], 'batch_size': 8, 'flops': 6328014815744}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4429610391296}, {'type': 'perplexity', 'in_batch_dim': [8, 160], 'batch_size': 8, 'flops': 3164007441664}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4113209653888}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5695213340928}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3796808916480}, {'type': 'perplexity', 'in_batch_dim': [8, 160], 'batch_size': 8, 'flops': 3164007441664}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6011614078336}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5062411866112}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4429610391296}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3480408179072}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4746011128704}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3480408179072}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4113209653888}, {'type': 'perplexity', 'in_batch_dim': [8, 624], 'batch_size': 8, 'flops': 12339628826496}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4113209653888}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5695213340928}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5062411866112}, {'type': 'perplexity', 'in_batch_dim': [8, 384], 'batch_size': 8, 'flops': 7593617765376}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4429610391296}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4113209653888}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5062411866112}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4429610391296}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5695213340928}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3796808916480}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5378812603520}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4429610391296}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5378812603520}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3796808916480}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5695213340928}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4429610391296}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5062411866112}, {'type': 'perplexity', 'in_batch_dim': [8, 160], 'batch_size': 8, 'flops': 3164007441664}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4746011128704}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5695213340928}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3796808916480}, {'type': 'perplexity', 'in_batch_dim': [8, 416], 'batch_size': 8, 'flops': 8226419240192}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3796808916480}, {'type': 'perplexity', 'in_batch_dim': [8, 160], 'batch_size': 8, 'flops': 3164007441664}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4746011128704}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3796808916480}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2847606704256}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5378812603520}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4429610391296}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4113209653888}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4429610391296}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6011614078336}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4113209653888}, {'type': 'perplexity', 'in_batch_dim': [8, 320], 'batch_size': 8, 'flops': 6328014815744}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4746011128704}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4746011128704}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3796808916480}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5062411866112}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3480408179072}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3796808916480}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3480408179072}, {'type': 'perplexity', 'in_batch_dim': [8, 352], 'batch_size': 8, 'flops': 6960816290560}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5062411866112}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4429610391296}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6011614078336}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5062411866112}, {'type': 'perplexity', 'in_batch_dim': [8, 576], 'batch_size': 8, 'flops': 11390426614272}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4429610391296}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3796808916480}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6011614078336}, {'type': 'perplexity', 'in_batch_dim': [8, 432], 'batch_size': 8, 'flops': 8542819977600}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5378812603520}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5378812603520}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5062411866112}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4113209653888}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4113209653888}, {'type': 'perplexity', 'in_batch_dim': [8, 368], 'batch_size': 8, 'flops': 7277217027968}, {'type': 'perplexity', 'in_batch_dim': [8, 160], 'batch_size': 8, 'flops': 3164007441664}, {'type': 'perplexity', 'in_batch_dim': [8, 368], 'batch_size': 8, 'flops': 7277217027968}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3796808916480}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4746011128704}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4113209653888}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4429610391296}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5695213340928}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5695213340928}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5062411866112}, {'type': 'perplexity', 'in_batch_dim': [8, 432], 'batch_size': 8, 'flops': 8542819977600}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5062411866112}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4113209653888}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3480408179072}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3480408179072}, {'type': 'perplexity', 'in_batch_dim': [8, 160], 'batch_size': 8, 'flops': 3164007441664}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4113209653888}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3796808916480}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5062411866112}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 368], 'batch_size': 8, 'flops': 7277217027968}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5378812603520}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5378812603520}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5062411866112}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2847606704256}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4429610391296}, {'type': 'perplexity', 'in_batch_dim': [8, 320], 'batch_size': 8, 'flops': 6328014815744}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3796808916480}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4429610391296}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4113209653888}, {'type': 'perplexity', 'in_batch_dim': [8, 384], 'batch_size': 8, 'flops': 7593617765376}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5378812603520}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4746011128704}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3480408179072}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5062411866112}, {'type': 'perplexity', 'in_batch_dim': [8, 320], 'batch_size': 8, 'flops': 6328014815744}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5378812603520}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4113209653888}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4113209653888}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5695213340928}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3480408179072}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6011614078336}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4746011128704}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5378812603520}, {'type': 'perplexity', 'in_batch_dim': [8, 320], 'batch_size': 8, 'flops': 6328014815744}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4429610391296}, {'type': 'perplexity', 'in_batch_dim': [8, 320], 'batch_size': 8, 'flops': 6328014815744}, {'type': 'perplexity', 'in_batch_dim': [8, 416], 'batch_size': 8, 'flops': 8226419240192}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5062411866112}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5062411866112}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2847606704256}, {'type': 'perplexity', 'in_batch_dim': [8, 336], 'batch_size': 8, 'flops': 6644415553152}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6011614078336}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5062411866112}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4113209653888}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5695213340928}, {'type': 'perplexity', 'in_batch_dim': [8, 416], 'batch_size': 8, 'flops': 8226419240192}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3796808916480}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5695213340928}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4746011128704}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4429610391296}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4113209653888}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3480408179072}, {'type': 'perplexity', 'in_batch_dim': [8, 320], 'batch_size': 8, 'flops': 6328014815744}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3480408179072}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5378812603520}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6011614078336}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4429610391296}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5695213340928}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5695213340928}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3480408179072}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4429610391296}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5062411866112}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3480408179072}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4113209653888}, {'type': 'perplexity', 'in_batch_dim': [8, 352], 'batch_size': 8, 'flops': 6960816290560}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4113209653888}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2847606704256}, {'type': 'perplexity', 'in_batch_dim': [8, 160], 'batch_size': 8, 'flops': 3164007441664}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3796808916480}, {'type': 'perplexity', 'in_batch_dim': [8, 496], 'batch_size': 8, 'flops': 9808422927232}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3480408179072}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4113209653888}, {'type': 'perplexity', 'in_batch_dim': [8, 544], 'batch_size': 8, 'flops': 10757625139456}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6011614078336}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5378812603520}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5695213340928}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4746011128704}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5695213340928}, {'type': 'perplexity', 'in_batch_dim': [8, 384], 'batch_size': 8, 'flops': 7593617765376}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3480408179072}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5378812603520}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3480408179072}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5378812603520}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5062411866112}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3796808916480}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5062411866112}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5695213340928}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5378812603520}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4429610391296}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5062411866112}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3480408179072}, {'type': 'perplexity', 'in_batch_dim': [8, 656], 'batch_size': 8, 'flops': 12972430301312}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5062411866112}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3796808916480}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3480408179072}, {'type': 'perplexity', 'in_batch_dim': [8, 160], 'batch_size': 8, 'flops': 3164007441664}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5695213340928}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6011614078336}, {'type': 'perplexity', 'in_batch_dim': [8, 320], 'batch_size': 8, 'flops': 6328014815744}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5062411866112}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5062411866112}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5695213340928}, {'type': 'perplexity', 'in_batch_dim': [8, 320], 'batch_size': 8, 'flops': 6328014815744}, {'type': 'perplexity', 'in_batch_dim': [8, 384], 'batch_size': 8, 'flops': 7593617765376}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5062411866112}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4746011128704}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4113209653888}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4746011128704}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4746011128704}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3480408179072}, {'type': 'perplexity', 'in_batch_dim': [8, 320], 'batch_size': 8, 'flops': 6328014815744}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5695213340928}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5062411866112}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4113209653888}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4746011128704}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2847606704256}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3796808916480}, {'type': 'perplexity', 'in_batch_dim': [8, 336], 'batch_size': 8, 'flops': 6644415553152}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4113209653888}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4746011128704}, {'type': 'perplexity', 'in_batch_dim': [8, 496], 'batch_size': 8, 'flops': 9808422927232}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5378812603520}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5378812603520}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4429610391296}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5062411866112}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6011614078336}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4746011128704}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3796808916480}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4113209653888}, {'type': 'perplexity', 'in_batch_dim': [8, 496], 'batch_size': 8, 'flops': 9808422927232}, {'type': 'perplexity', 'in_batch_dim': [8, 448], 'batch_size': 8, 'flops': 8859220715008}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6011614078336}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3796808916480}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3480408179072}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5062411866112}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4429610391296}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4113209653888}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4746011128704}, {'type': 'perplexity', 'in_batch_dim': [8, 320], 'batch_size': 8, 'flops': 6328014815744}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6011614078336}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3480408179072}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3796808916480}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3796808916480}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4746011128704}, {'type': 'perplexity', 'in_batch_dim': [8, 416], 'batch_size': 8, 'flops': 8226419240192}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5695213340928}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6011614078336}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6011614078336}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4746011128704}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3480408179072}, {'type': 'perplexity', 'in_batch_dim': [8, 160], 'batch_size': 8, 'flops': 3164007441664}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5695213340928}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3480408179072}, {'type': 'perplexity', 'in_batch_dim': [8, 400], 'batch_size': 8, 'flops': 7910018502784}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4429610391296}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5378812603520}, {'type': 'perplexity', 'in_batch_dim': [8, 400], 'batch_size': 8, 'flops': 7910018502784}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5378812603520}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3480408179072}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6011614078336}, {'type': 'perplexity', 'in_batch_dim': [8, 160], 'batch_size': 8, 'flops': 3164007441664}, {'type': 'perplexity', 'in_batch_dim': [8, 352], 'batch_size': 8, 'flops': 6960816290560}, {'type': 'perplexity', 'in_batch_dim': [8, 320], 'batch_size': 8, 'flops': 6328014815744}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5062411866112}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5062411866112}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4113209653888}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4429610391296}, {'type': 'perplexity', 'in_batch_dim': [8, 352], 'batch_size': 8, 'flops': 6960816290560}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4113209653888}, {'type': 'perplexity', 'in_batch_dim': [8, 160], 'batch_size': 8, 'flops': 3164007441664}, {'type': 'perplexity', 'in_batch_dim': [8, 336], 'batch_size': 8, 'flops': 6644415553152}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4429610391296}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3796808916480}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5378812603520}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5378812603520}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4113209653888}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5378812603520}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3796808916480}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5378812603520}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5378812603520}, {'type': 'perplexity', 'in_batch_dim': [8, 160], 'batch_size': 8, 'flops': 3164007441664}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3480408179072}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6011614078336}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5695213340928}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5378812603520}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5378812603520}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4429610391296}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4746011128704}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6011614078336}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5062411866112}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6011614078336}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5378812603520}, {'type': 'perplexity', 'in_batch_dim': [8, 160], 'batch_size': 8, 'flops': 3164007441664}, {'type': 'perplexity', 'in_batch_dim': [8, 160], 'batch_size': 8, 'flops': 3164007441664}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5062411866112}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5062411866112}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3796808916480}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6011614078336}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4113209653888}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4746011128704}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5378812603520}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3480408179072}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3796808916480}, {'type': 'perplexity', 'in_batch_dim': [8, 320], 'batch_size': 8, 'flops': 6328014815744}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5062411866112}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4746011128704}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5062411866112}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4746011128704}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6011614078336}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3796808916480}, {'type': 'perplexity', 'in_batch_dim': [8, 160], 'batch_size': 8, 'flops': 3164007441664}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2847606704256}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4429610391296}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4746011128704}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5062411866112}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5378812603520}, {'type': 'perplexity', 'in_batch_dim': [8, 464], 'batch_size': 8, 'flops': 9175621452416}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4113209653888}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3480408179072}, {'type': 'perplexity', 'in_batch_dim': [8, 384], 'batch_size': 8, 'flops': 7593617765376}, {'type': 'perplexity', 'in_batch_dim': [8, 448], 'batch_size': 8, 'flops': 8859220715008}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5378812603520}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4113209653888}, {'type': 'perplexity', 'in_batch_dim': [8, 320], 'batch_size': 8, 'flops': 6328014815744}, {'type': 'perplexity', 'in_batch_dim': [8, 560], 'batch_size': 8, 'flops': 11074025876864}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5378812603520}, {'type': 'perplexity', 'in_batch_dim': [8, 160], 'batch_size': 8, 'flops': 3164007441664}, {'type': 'perplexity', 'in_batch_dim': [8, 320], 'batch_size': 8, 'flops': 6328014815744}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5062411866112}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6011614078336}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4746011128704}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6011614078336}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3796808916480}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4429610391296}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4113209653888}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4746011128704}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3480408179072}, {'type': 'perplexity', 'in_batch_dim': [8, 384], 'batch_size': 8, 'flops': 7593617765376}, {'type': 'perplexity', 'in_batch_dim': [8, 576], 'batch_size': 8, 'flops': 11390426614272}, {'type': 'perplexity', 'in_batch_dim': [8, 160], 'batch_size': 8, 'flops': 3164007441664}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4746011128704}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4746011128704}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4429610391296}, {'type': 'perplexity', 'in_batch_dim': [8, 336], 'batch_size': 8, 'flops': 6644415553152}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4746011128704}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3796808916480}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3796808916480}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5062411866112}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5378812603520}, {'type': 'perplexity', 'in_batch_dim': [8, 368], 'batch_size': 8, 'flops': 7277217027968}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3480408179072}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4113209653888}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4746011128704}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3480408179072}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4746011128704}, {'type': 'perplexity', 'in_batch_dim': [8, 1168], 'batch_size': 8, 'flops': 23097253898368}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5062411866112}, {'type': 'perplexity', 'in_batch_dim': [8, 160], 'batch_size': 8, 'flops': 3164007441664}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4746011128704}, {'type': 'perplexity', 'in_batch_dim': [8, 640], 'batch_size': 8, 'flops': 12656029563904}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5378812603520}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4429610391296}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4429610391296}, {'type': 'perplexity', 'in_batch_dim': [8, 384], 'batch_size': 8, 'flops': 7593617765376}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3480408179072}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4113209653888}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3796808916480}, {'type': 'perplexity', 'in_batch_dim': [8, 336], 'batch_size': 8, 'flops': 6644415553152}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3480408179072}, {'type': 'perplexity', 'in_batch_dim': [6, 208], 'batch_size': 8, 'flops': 4113209653888}], 'timestamp': '2025-09-10 02:24:58.849819', 'step': 4263, 'epoch': 2} {'type': 'pplx', 'content': 22181812.0487706, 'timestamp': '2025-09-10 02:24:58.852988', 'step': 4263, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 320], 'flops': 9492337256192}, 'timestamp': '2025-09-10 02:24:58.883995', 'step': 4263, 'epoch': 2} {'type': 'loss', 'content': 0.0003768012975342572, 'timestamp': '2025-09-10 02:24:58.916781', 'step': 4264, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 8068526078144}, 'timestamp': '2025-09-10 02:24:58.950995', 'step': 4264, 'epoch': 2} {'type': 'loss', 'content': 0.006254100706428289, 'timestamp': '2025-09-10 02:24:58.958279', 'step': 4265, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-10 02:24:58.989706', 'step': 4265, 'epoch': 2} {'type': 'loss', 'content': 0.0002495805674698204, 'timestamp': '2025-09-10 02:24:58.992194', 'step': 4266, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 6170111174080}, 'timestamp': '2025-09-10 02:24:59.024254', 'step': 4266, 'epoch': 2} {'type': 'loss', 'content': 6.783670687582344e-05, 'timestamp': '2025-09-10 02:24:59.030786', 'step': 4267, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 8068526078144}, 'timestamp': '2025-09-10 02:24:59.062701', 'step': 4267, 'epoch': 2} {'type': 'loss', 'content': 0.00031304662115871906, 'timestamp': '2025-09-10 02:24:59.093777', 'step': 4268, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 336], 'flops': 9966940982208}, 'timestamp': '2025-09-10 02:24:59.126070', 'step': 4268, 'epoch': 2} {'type': 'loss', 'content': 0.0011215128470212221, 'timestamp': '2025-09-10 02:24:59.138732', 'step': 4269, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 7593922352128}, 'timestamp': '2025-09-10 02:24:59.171123', 'step': 4269, 'epoch': 2} {'type': 'loss', 'content': 0.00010794185072882101, 'timestamp': '2025-09-10 02:24:59.179029', 'step': 4270, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 176], 'flops': 5220903722048}, 'timestamp': '2025-09-10 02:24:59.209624', 'step': 4270, 'epoch': 2} {'type': 'loss', 'content': 0.01281669456511736, 'timestamp': '2025-09-10 02:24:59.213646', 'step': 4271, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 6170111174080}, 'timestamp': '2025-09-10 02:24:59.245430', 'step': 4271, 'epoch': 2} {'type': 'loss', 'content': 0.00017699485761113465, 'timestamp': '2025-09-10 02:24:59.273229', 'step': 4272, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 352], 'flops': 10441544708224}, 'timestamp': '2025-09-10 02:24:59.307346', 'step': 4272, 'epoch': 2} {'type': 'loss', 'content': 0.007482998538762331, 'timestamp': '2025-09-10 02:24:59.320324', 'step': 4273, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 176], 'flops': 5220903722048}, 'timestamp': '2025-09-10 02:24:59.351732', 'step': 4273, 'epoch': 2} {'type': 'loss', 'content': 0.002870141062885523, 'timestamp': '2025-09-10 02:24:59.355889', 'step': 4274, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 6644714900096}, 'timestamp': '2025-09-10 02:24:59.386629', 'step': 4274, 'epoch': 2} {'type': 'loss', 'content': 0.00027059766580350697, 'timestamp': '2025-09-10 02:24:59.393689', 'step': 4275, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 8068526078144}, 'timestamp': '2025-09-10 02:24:59.424586', 'step': 4275, 'epoch': 2} {'type': 'loss', 'content': 0.0001303361786995083, 'timestamp': '2025-09-10 02:24:59.455849', 'step': 4276, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 6170111174080}, 'timestamp': '2025-09-10 02:24:59.487777', 'step': 4276, 'epoch': 2} {'type': 'loss', 'content': 0.0001203405117848888, 'timestamp': '2025-09-10 02:24:59.492191', 'step': 4277, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 7593922352128}, 'timestamp': '2025-09-10 02:24:59.523578', 'step': 4277, 'epoch': 2} {'type': 'loss', 'content': 0.0019344912143424153, 'timestamp': '2025-09-10 02:24:59.531227', 'step': 4278, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 384], 'flops': 11390752160256}, 'timestamp': '2025-09-10 02:24:59.568424', 'step': 4278, 'epoch': 2} {'type': 'loss', 'content': 0.00048476256779395044, 'timestamp': '2025-09-10 02:24:59.582376', 'step': 4279, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 7119318626112}, 'timestamp': '2025-09-10 02:24:59.613549', 'step': 4279, 'epoch': 2} {'type': 'loss', 'content': 0.00010040303459390998, 'timestamp': '2025-09-10 02:24:59.641773', 'step': 4280, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 4746299996032}, 'timestamp': '2025-09-10 02:24:59.673425', 'step': 4280, 'epoch': 2} {'type': 'loss', 'content': 0.00014088333409745246, 'timestamp': '2025-09-10 02:24:59.675811', 'step': 4281, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 304], 'flops': 9017733530176}, 'timestamp': '2025-09-10 02:24:59.707021', 'step': 4281, 'epoch': 2} {'type': 'loss', 'content': 0.00018304158584214747, 'timestamp': '2025-09-10 02:24:59.719352', 'step': 4282, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 176], 'flops': 5220903722048}, 'timestamp': '2025-09-10 02:24:59.751347', 'step': 4282, 'epoch': 2} {'type': 'loss', 'content': 0.0002209401864092797, 'timestamp': '2025-09-10 02:24:59.755552', 'step': 4283, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 176], 'flops': 5220903722048}, 'timestamp': '2025-09-10 02:24:59.787338', 'step': 4283, 'epoch': 2} {'type': 'loss', 'content': 0.04991947486996651, 'timestamp': '2025-09-10 02:24:59.812420', 'step': 4284, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 176], 'flops': 5220903722048}, 'timestamp': '2025-09-10 02:24:59.843487', 'step': 4284, 'epoch': 2} {'type': 'loss', 'content': 0.0009700055816210806, 'timestamp': '2025-09-10 02:24:59.845657', 'step': 4285, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 304], 'flops': 9017733530176}, 'timestamp': '2025-09-10 02:24:59.877995', 'step': 4285, 'epoch': 2} {'type': 'loss', 'content': 0.0005855086492374539, 'timestamp': '2025-09-10 02:24:59.890131', 'step': 4286, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 320], 'flops': 9492337256192}, 'timestamp': '2025-09-10 02:24:59.921077', 'step': 4286, 'epoch': 2} {'type': 'loss', 'content': 0.032629940658807755, 'timestamp': '2025-09-10 02:24:59.933614', 'step': 4287, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 8068526078144}, 'timestamp': '2025-09-10 02:24:59.964627', 'step': 4287, 'epoch': 2} {'type': 'loss', 'content': 8.535251981811598e-05, 'timestamp': '2025-09-10 02:24:59.995921', 'step': 4288, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 8543129804160}, 'timestamp': '2025-09-10 02:25:00.027327', 'step': 4288, 'epoch': 2} {'type': 'loss', 'content': 9.947276703314856e-05, 'timestamp': '2025-09-10 02:25:00.035463', 'step': 4289, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 8068526078144}, 'timestamp': '2025-09-10 02:25:00.071823', 'step': 4289, 'epoch': 2} {'type': 'loss', 'content': 0.00045155364205129445, 'timestamp': '2025-09-10 02:25:00.082136', 'step': 4290, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 8068526078144}, 'timestamp': '2025-09-10 02:25:00.113172', 'step': 4290, 'epoch': 2} {'type': 'loss', 'content': 0.0010928146075457335, 'timestamp': '2025-09-10 02:25:00.123319', 'step': 4291, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 6644714900096}, 'timestamp': '2025-09-10 02:25:00.154266', 'step': 4291, 'epoch': 2} {'type': 'loss', 'content': 0.020004166290163994, 'timestamp': '2025-09-10 02:25:00.182271', 'step': 4292, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 7593922352128}, 'timestamp': '2025-09-10 02:25:00.213350', 'step': 4292, 'epoch': 2} {'type': 'loss', 'content': 0.00102779152803123, 'timestamp': '2025-09-10 02:25:00.218681', 'step': 4293, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 304], 'flops': 9017733530176}, 'timestamp': '2025-09-10 02:25:00.250280', 'step': 4293, 'epoch': 2} {'type': 'loss', 'content': 0.00010686110181268305, 'timestamp': '2025-09-10 02:25:00.262661', 'step': 4294, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 304], 'flops': 9017733530176}, 'timestamp': '2025-09-10 02:25:00.292792', 'step': 4294, 'epoch': 2} {'type': 'loss', 'content': 0.00020121461420785636, 'timestamp': '2025-09-10 02:25:00.305178', 'step': 4295, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 6170111174080}, 'timestamp': '2025-09-10 02:25:00.335235', 'step': 4295, 'epoch': 2} {'type': 'loss', 'content': 0.0013605983695015311, 'timestamp': '2025-09-10 02:25:00.363064', 'step': 4296, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 7119318626112}, 'timestamp': '2025-09-10 02:25:00.393824', 'step': 4296, 'epoch': 2} {'type': 'loss', 'content': 0.0013821868924424052, 'timestamp': '2025-09-10 02:25:00.398980', 'step': 4297, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 6644714900096}, 'timestamp': '2025-09-10 02:25:00.430139', 'step': 4297, 'epoch': 2} {'type': 'loss', 'content': 3.0794344638707116e-05, 'timestamp': '2025-09-10 02:25:00.437218', 'step': 4298, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 6644714900096}, 'timestamp': '2025-09-10 02:25:00.468044', 'step': 4298, 'epoch': 2} {'type': 'loss', 'content': 0.004364358726888895, 'timestamp': '2025-09-10 02:25:00.475072', 'step': 4299, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 6170111174080}, 'timestamp': '2025-09-10 02:25:00.506555', 'step': 4299, 'epoch': 2} {'type': 'loss', 'content': 0.0013004717184230685, 'timestamp': '2025-09-10 02:25:00.534297', 'step': 4300, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 8068526078144}, 'timestamp': '2025-09-10 02:25:00.564881', 'step': 4300, 'epoch': 2} {'type': 'loss', 'content': 0.002206193981692195, 'timestamp': '2025-09-10 02:25:00.572088', 'step': 4301, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 6170111174080}, 'timestamp': '2025-09-10 02:25:00.603449', 'step': 4301, 'epoch': 2} {'type': 'loss', 'content': 5.780989522463642e-05, 'timestamp': '2025-09-10 02:25:00.610376', 'step': 4302, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 8068526078144}, 'timestamp': '2025-09-10 02:25:00.641640', 'step': 4302, 'epoch': 2} {'type': 'loss', 'content': 0.0021686165127903223, 'timestamp': '2025-09-10 02:25:00.652197', 'step': 4303, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 304], 'flops': 9017733530176}, 'timestamp': '2025-09-10 02:25:00.685700', 'step': 4303, 'epoch': 2} {'type': 'loss', 'content': 0.0004125793057028204, 'timestamp': '2025-09-10 02:25:00.718939', 'step': 4304, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 7119318626112}, 'timestamp': '2025-09-10 02:25:00.753236', 'step': 4304, 'epoch': 2} {'type': 'loss', 'content': 0.04362964630126953, 'timestamp': '2025-09-10 02:25:00.758456', 'step': 4305, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 176], 'flops': 5220903722048}, 'timestamp': '2025-09-10 02:25:00.791572', 'step': 4305, 'epoch': 2} {'type': 'loss', 'content': 0.00016738659178372473, 'timestamp': '2025-09-10 02:25:00.795703', 'step': 4306, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 6170111174080}, 'timestamp': '2025-09-10 02:25:00.826855', 'step': 4306, 'epoch': 2} {'type': 'loss', 'content': 0.00039224643842317164, 'timestamp': '2025-09-10 02:25:00.833827', 'step': 4307, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 5695507448064}, 'timestamp': '2025-09-10 02:25:00.864064', 'step': 4307, 'epoch': 2} {'type': 'loss', 'content': 0.0002121599536621943, 'timestamp': '2025-09-10 02:25:00.889610', 'step': 4308, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 7593922352128}, 'timestamp': '2025-09-10 02:25:00.920451', 'step': 4308, 'epoch': 2} {'type': 'loss', 'content': 0.0006208484992384911, 'timestamp': '2025-09-10 02:25:00.926077', 'step': 4309, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 8068526078144}, 'timestamp': '2025-09-10 02:25:00.957605', 'step': 4309, 'epoch': 2} {'type': 'loss', 'content': 0.0001874407462310046, 'timestamp': '2025-09-10 02:25:00.967556', 'step': 4310, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 7119318626112}, 'timestamp': '2025-09-10 02:25:00.998813', 'step': 4310, 'epoch': 2} {'type': 'loss', 'content': 3.587496030377224e-05, 'timestamp': '2025-09-10 02:25:01.006283', 'step': 4311, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 8068526078144}, 'timestamp': '2025-09-10 02:25:01.037457', 'step': 4311, 'epoch': 2} {'type': 'loss', 'content': 0.001078314846381545, 'timestamp': '2025-09-10 02:25:01.068721', 'step': 4312, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 7593922352128}, 'timestamp': '2025-09-10 02:25:01.100766', 'step': 4312, 'epoch': 2} {'type': 'loss', 'content': 0.00012618736946024, 'timestamp': '2025-09-10 02:25:01.105848', 'step': 4313, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 368], 'flops': 10916148434240}, 'timestamp': '2025-09-10 02:25:01.141178', 'step': 4313, 'epoch': 2} {'type': 'loss', 'content': 0.020358415320515633, 'timestamp': '2025-09-10 02:25:01.154952', 'step': 4314, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 336], 'flops': 9966940982208}, 'timestamp': '2025-09-10 02:25:01.189177', 'step': 4314, 'epoch': 2} {'type': 'loss', 'content': 0.0020916808862239122, 'timestamp': '2025-09-10 02:25:01.202463', 'step': 4315, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 7593922352128}, 'timestamp': '2025-09-10 02:25:01.233436', 'step': 4315, 'epoch': 2} {'type': 'loss', 'content': 0.00011025350977433845, 'timestamp': '2025-09-10 02:25:01.261997', 'step': 4316, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 304], 'flops': 9017733530176}, 'timestamp': '2025-09-10 02:25:01.294453', 'step': 4316, 'epoch': 2} {'type': 'loss', 'content': 0.00029134147916920483, 'timestamp': '2025-09-10 02:25:01.303664', 'step': 4317, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 6644714900096}, 'timestamp': '2025-09-10 02:25:01.334769', 'step': 4317, 'epoch': 2} {'type': 'loss', 'content': 0.00012424368469510227, 'timestamp': '2025-09-10 02:25:01.341911', 'step': 4318, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 7593922352128}, 'timestamp': '2025-09-10 02:25:01.373280', 'step': 4318, 'epoch': 2} {'type': 'loss', 'content': 9.490887896390632e-05, 'timestamp': '2025-09-10 02:25:01.381056', 'step': 4319, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 6170111174080}, 'timestamp': '2025-09-10 02:25:01.412783', 'step': 4319, 'epoch': 2} {'type': 'loss', 'content': 0.00033820615499280393, 'timestamp': '2025-09-10 02:25:01.440434', 'step': 4320, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 8068526078144}, 'timestamp': '2025-09-10 02:25:01.472346', 'step': 4320, 'epoch': 2} {'type': 'loss', 'content': 0.0010137018980458379, 'timestamp': '2025-09-10 02:25:01.479957', 'step': 4321, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 7119318626112}, 'timestamp': '2025-09-10 02:25:01.512010', 'step': 4321, 'epoch': 2} {'type': 'loss', 'content': 0.0029211000073701143, 'timestamp': '2025-09-10 02:25:01.519422', 'step': 4322, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 4746299996032}, 'timestamp': '2025-09-10 02:25:01.553581', 'step': 4322, 'epoch': 2} {'type': 'loss', 'content': 0.0003600471536628902, 'timestamp': '2025-09-10 02:25:01.557064', 'step': 4323, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 320], 'flops': 9492337256192}, 'timestamp': '2025-09-10 02:25:01.595108', 'step': 4323, 'epoch': 2} {'type': 'loss', 'content': 0.0003258692449890077, 'timestamp': '2025-09-10 02:25:01.628523', 'step': 4324, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 176], 'flops': 5220903722048}, 'timestamp': '2025-09-10 02:25:01.665460', 'step': 4324, 'epoch': 2} {'type': 'loss', 'content': 0.00018436498066876084, 'timestamp': '2025-09-10 02:25:01.669802', 'step': 4325, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 6170111174080}, 'timestamp': '2025-09-10 02:25:01.706002', 'step': 4325, 'epoch': 2} {'type': 'loss', 'content': 0.00016462701023556292, 'timestamp': '2025-09-10 02:25:01.712863', 'step': 4326, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 6170111174080}, 'timestamp': '2025-09-10 02:25:01.751547', 'step': 4326, 'epoch': 2} {'type': 'loss', 'content': 0.00015942190657369792, 'timestamp': '2025-09-10 02:25:01.758328', 'step': 4327, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 4746299996032}, 'timestamp': '2025-09-10 02:25:01.797714', 'step': 4327, 'epoch': 2} {'type': 'loss', 'content': 0.0009382445714436471, 'timestamp': '2025-09-10 02:25:01.821965', 'step': 4328, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 304], 'flops': 9017733530176}, 'timestamp': '2025-09-10 02:25:01.863480', 'step': 4328, 'epoch': 2} {'type': 'loss', 'content': 0.00022544125386048108, 'timestamp': '2025-09-10 02:25:01.873011', 'step': 4329, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-10 02:25:01.907683', 'step': 4329, 'epoch': 2} {'type': 'loss', 'content': 0.00011031327449018136, 'timestamp': '2025-09-10 02:25:01.911985', 'step': 4330, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 384], 'flops': 11390752160256}, 'timestamp': '2025-09-10 02:25:01.953697', 'step': 4330, 'epoch': 2} {'type': 'loss', 'content': 0.014851606450974941, 'timestamp': '2025-09-10 02:25:01.967684', 'step': 4331, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 176], 'flops': 5220903722048}, 'timestamp': '2025-09-10 02:25:02.006933', 'step': 4331, 'epoch': 2} {'type': 'loss', 'content': 7.220734551083297e-05, 'timestamp': '2025-09-10 02:25:02.032416', 'step': 4332, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 7593922352128}, 'timestamp': '2025-09-10 02:25:02.065054', 'step': 4332, 'epoch': 2} {'type': 'loss', 'content': 0.00027742632664740086, 'timestamp': '2025-09-10 02:25:02.070467', 'step': 4333, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 432], 'flops': 12814563338304}, 'timestamp': '2025-09-10 02:25:02.110243', 'step': 4333, 'epoch': 2} {'type': 'loss', 'content': 0.00022008584346622229, 'timestamp': '2025-09-10 02:25:02.126339', 'step': 4334, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 7593922352128}, 'timestamp': '2025-09-10 02:25:02.157983', 'step': 4334, 'epoch': 2} {'type': 'loss', 'content': 0.02434348128736019, 'timestamp': '2025-09-10 02:25:02.165668', 'step': 4335, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 8068526078144}, 'timestamp': '2025-09-10 02:25:02.198239', 'step': 4335, 'epoch': 2} {'type': 'loss', 'content': 6.505424244096503e-05, 'timestamp': '2025-09-10 02:25:02.229114', 'step': 4336, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 7593922352128}, 'timestamp': '2025-09-10 02:25:02.260400', 'step': 4336, 'epoch': 2} {'type': 'loss', 'content': 0.0028443282935768366, 'timestamp': '2025-09-10 02:25:02.265722', 'step': 4337, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 7119318626112}, 'timestamp': '2025-09-10 02:25:02.297700', 'step': 4337, 'epoch': 2} {'type': 'loss', 'content': 0.021456856280565262, 'timestamp': '2025-09-10 02:25:02.305252', 'step': 4338, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 4746299996032}, 'timestamp': '2025-09-10 02:25:02.336131', 'step': 4338, 'epoch': 2} {'type': 'loss', 'content': 0.00022894078574609011, 'timestamp': '2025-09-10 02:25:02.338752', 'step': 4339, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 448], 'flops': 13289167064320}, 'timestamp': '2025-09-10 02:25:02.381660', 'step': 4339, 'epoch': 2} {'type': 'loss', 'content': 0.0002486018347553909, 'timestamp': '2025-09-10 02:25:02.418933', 'step': 4340, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 6170111174080}, 'timestamp': '2025-09-10 02:25:02.449597', 'step': 4340, 'epoch': 2} {'type': 'loss', 'content': 0.008721557445824146, 'timestamp': '2025-09-10 02:25:02.454124', 'step': 4341, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 7119318626112}, 'timestamp': '2025-09-10 02:25:02.484784', 'step': 4341, 'epoch': 2} {'type': 'loss', 'content': 0.025644836947321892, 'timestamp': '2025-09-10 02:25:02.492080', 'step': 4342, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 5695507448064}, 'timestamp': '2025-09-10 02:25:02.526057', 'step': 4342, 'epoch': 2} {'type': 'loss', 'content': 0.002075839089229703, 'timestamp': '2025-09-10 02:25:02.530195', 'step': 4343, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 6644714900096}, 'timestamp': '2025-09-10 02:25:02.561412', 'step': 4343, 'epoch': 2} {'type': 'loss', 'content': 0.0144526781514287, 'timestamp': '2025-09-10 02:25:02.588953', 'step': 4344, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 6170111174080}, 'timestamp': '2025-09-10 02:25:02.619914', 'step': 4344, 'epoch': 2} {'type': 'loss', 'content': 0.009567998349666595, 'timestamp': '2025-09-10 02:25:02.624449', 'step': 4345, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 6170111174080}, 'timestamp': '2025-09-10 02:25:02.658395', 'step': 4345, 'epoch': 2} {'type': 'loss', 'content': 0.0002823833783622831, 'timestamp': '2025-09-10 02:25:02.665186', 'step': 4346, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 8068526078144}, 'timestamp': '2025-09-10 02:25:02.696805', 'step': 4346, 'epoch': 2} {'type': 'loss', 'content': 0.005126704927533865, 'timestamp': '2025-09-10 02:25:02.706668', 'step': 4347, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 6644714900096}, 'timestamp': '2025-09-10 02:25:02.737506', 'step': 4347, 'epoch': 2} {'type': 'loss', 'content': 6.73050744808279e-05, 'timestamp': '2025-09-10 02:25:02.765447', 'step': 4348, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 320], 'flops': 9492337256192}, 'timestamp': '2025-09-10 02:25:02.797360', 'step': 4348, 'epoch': 2} {'type': 'loss', 'content': 0.0014881890965625644, 'timestamp': '2025-09-10 02:25:02.807494', 'step': 4349, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 5695507448064}, 'timestamp': '2025-09-10 02:25:02.838380', 'step': 4349, 'epoch': 2} {'type': 'loss', 'content': 0.0008507216116413474, 'timestamp': '2025-09-10 02:25:02.842906', 'step': 4350, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 6170111174080}, 'timestamp': '2025-09-10 02:25:02.877739', 'step': 4350, 'epoch': 2} {'type': 'loss', 'content': 0.00017134180234279484, 'timestamp': '2025-09-10 02:25:02.884535', 'step': 4351, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 7119318626112}, 'timestamp': '2025-09-10 02:25:02.915481', 'step': 4351, 'epoch': 2} {'type': 'loss', 'content': 0.008143614046275616, 'timestamp': '2025-09-10 02:25:02.943789', 'step': 4352, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 176], 'flops': 5220903722048}, 'timestamp': '2025-09-10 02:25:02.974733', 'step': 4352, 'epoch': 2} {'type': 'loss', 'content': 0.005178903229534626, 'timestamp': '2025-09-10 02:25:02.977062', 'step': 4353, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 7593922352128}, 'timestamp': '2025-09-10 02:25:03.010208', 'step': 4353, 'epoch': 2} {'type': 'loss', 'content': 0.0012402004795148969, 'timestamp': '2025-09-10 02:25:03.018067', 'step': 4354, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 8068526078144}, 'timestamp': '2025-09-10 02:25:03.048968', 'step': 4354, 'epoch': 2} {'type': 'loss', 'content': 0.00033361284295096993, 'timestamp': '2025-09-10 02:25:03.059247', 'step': 4355, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 5695507448064}, 'timestamp': '2025-09-10 02:25:03.096653', 'step': 4355, 'epoch': 2} {'type': 'loss', 'content': 9.43372942856513e-05, 'timestamp': '2025-09-10 02:25:03.122014', 'step': 4356, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 304], 'flops': 9017733530176}, 'timestamp': '2025-09-10 02:25:03.152986', 'step': 4356, 'epoch': 2} {'type': 'loss', 'content': 0.010012110695242882, 'timestamp': '2025-09-10 02:25:03.162705', 'step': 4357, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 320], 'flops': 9492337256192}, 'timestamp': '2025-09-10 02:25:03.194345', 'step': 4357, 'epoch': 2} {'type': 'loss', 'content': 0.0008857456268742681, 'timestamp': '2025-09-10 02:25:03.206853', 'step': 4358, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 176], 'flops': 5220903722048}, 'timestamp': '2025-09-10 02:25:03.238672', 'step': 4358, 'epoch': 2} {'type': 'loss', 'content': 0.0003111858095508069, 'timestamp': '2025-09-10 02:25:03.242498', 'step': 4359, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 5695507448064}, 'timestamp': '2025-09-10 02:25:03.273121', 'step': 4359, 'epoch': 2} {'type': 'loss', 'content': 0.0002316091413376853, 'timestamp': '2025-09-10 02:25:03.298351', 'step': 4360, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 6170111174080}, 'timestamp': '2025-09-10 02:25:03.329540', 'step': 4360, 'epoch': 2} {'type': 'loss', 'content': 0.009092413820326328, 'timestamp': '2025-09-10 02:25:03.334165', 'step': 4361, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 320], 'flops': 9492337256192}, 'timestamp': '2025-09-10 02:25:03.365369', 'step': 4361, 'epoch': 2} {'type': 'loss', 'content': 0.00018735427875071764, 'timestamp': '2025-09-10 02:25:03.377920', 'step': 4362, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 6170111174080}, 'timestamp': '2025-09-10 02:25:03.409909', 'step': 4362, 'epoch': 2} {'type': 'loss', 'content': 0.026521209627389908, 'timestamp': '2025-09-10 02:25:03.416748', 'step': 4363, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 5695507448064}, 'timestamp': '2025-09-10 02:25:03.447157', 'step': 4363, 'epoch': 2} {'type': 'loss', 'content': 0.000702383928000927, 'timestamp': '2025-09-10 02:25:03.472725', 'step': 4364, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 8543129804160}, 'timestamp': '2025-09-10 02:25:03.504432', 'step': 4364, 'epoch': 2} {'type': 'loss', 'content': 0.02729278802871704, 'timestamp': '2025-09-10 02:25:03.513002', 'step': 4365, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 8068526078144}, 'timestamp': '2025-09-10 02:25:03.544765', 'step': 4365, 'epoch': 2} {'type': 'loss', 'content': 0.0004027434333693236, 'timestamp': '2025-09-10 02:25:03.554960', 'step': 4366, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 7593922352128}, 'timestamp': '2025-09-10 02:25:03.585808', 'step': 4366, 'epoch': 2} {'type': 'loss', 'content': 0.002170759718865156, 'timestamp': '2025-09-10 02:25:03.593496', 'step': 4367, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-10 02:25:03.626118', 'step': 4367, 'epoch': 2} {'type': 'loss', 'content': 0.00022459625324700028, 'timestamp': '2025-09-10 02:25:03.650096', 'step': 4368, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 6170111174080}, 'timestamp': '2025-09-10 02:25:03.682870', 'step': 4368, 'epoch': 2} {'type': 'loss', 'content': 0.0006497269496321678, 'timestamp': '2025-09-10 02:25:03.686874', 'step': 4369, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 6170111174080}, 'timestamp': '2025-09-10 02:25:03.718865', 'step': 4369, 'epoch': 2} {'type': 'loss', 'content': 0.004197689704596996, 'timestamp': '2025-09-10 02:25:03.725576', 'step': 4370, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 4746299996032}, 'timestamp': '2025-09-10 02:25:03.757710', 'step': 4370, 'epoch': 2} {'type': 'loss', 'content': 0.021389422938227654, 'timestamp': '2025-09-10 02:25:03.760473', 'step': 4371, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 5695507448064}, 'timestamp': '2025-09-10 02:25:03.791399', 'step': 4371, 'epoch': 2} {'type': 'loss', 'content': 0.0005672698607668281, 'timestamp': '2025-09-10 02:25:03.816643', 'step': 4372, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 7593922352128}, 'timestamp': '2025-09-10 02:25:03.848327', 'step': 4372, 'epoch': 2} {'type': 'loss', 'content': 0.001765951863490045, 'timestamp': '2025-09-10 02:25:03.853585', 'step': 4373, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 5695507448064}, 'timestamp': '2025-09-10 02:25:03.884873', 'step': 4373, 'epoch': 2} {'type': 'loss', 'content': 0.00043063057819381356, 'timestamp': '2025-09-10 02:25:03.889252', 'step': 4374, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 7593922352128}, 'timestamp': '2025-09-10 02:25:03.921343', 'step': 4374, 'epoch': 2} {'type': 'loss', 'content': 0.00024490643409080803, 'timestamp': '2025-09-10 02:25:03.929220', 'step': 4375, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 5695507448064}, 'timestamp': '2025-09-10 02:25:03.960344', 'step': 4375, 'epoch': 2} {'type': 'loss', 'content': 0.0008961300482042134, 'timestamp': '2025-09-10 02:25:03.985581', 'step': 4376, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 4746299996032}, 'timestamp': '2025-09-10 02:25:04.017214', 'step': 4376, 'epoch': 2} {'type': 'loss', 'content': 0.05845966935157776, 'timestamp': '2025-09-10 02:25:04.019595', 'step': 4377, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 8543129804160}, 'timestamp': '2025-09-10 02:25:04.050407', 'step': 4377, 'epoch': 2} {'type': 'loss', 'content': 0.025336632505059242, 'timestamp': '2025-09-10 02:25:04.061385', 'step': 4378, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 352], 'flops': 10441544708224}, 'timestamp': '2025-09-10 02:25:04.100938', 'step': 4378, 'epoch': 2} {'type': 'loss', 'content': 0.010996916331350803, 'timestamp': '2025-09-10 02:25:04.114689', 'step': 4379, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 352], 'flops': 10441544708224}, 'timestamp': '2025-09-10 02:25:04.149851', 'step': 4379, 'epoch': 2} {'type': 'loss', 'content': 0.00864367000758648, 'timestamp': '2025-09-10 02:25:04.184450', 'step': 4380, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 7119318626112}, 'timestamp': '2025-09-10 02:25:04.215617', 'step': 4380, 'epoch': 2} {'type': 'loss', 'content': 9.313374903285876e-05, 'timestamp': '2025-09-10 02:25:04.220604', 'step': 4381, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 352], 'flops': 10441544708224}, 'timestamp': '2025-09-10 02:25:04.254668', 'step': 4381, 'epoch': 2} {'type': 'loss', 'content': 0.00039597839349880815, 'timestamp': '2025-09-10 02:25:04.268392', 'step': 4382, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 7593922352128}, 'timestamp': '2025-09-10 02:25:04.308879', 'step': 4382, 'epoch': 2} {'type': 'loss', 'content': 0.0007557374192401767, 'timestamp': '2025-09-10 02:25:04.316489', 'step': 4383, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 7119318626112}, 'timestamp': '2025-09-10 02:25:04.356029', 'step': 4383, 'epoch': 2} {'type': 'loss', 'content': 0.00034854214754886925, 'timestamp': '2025-09-10 02:25:04.384274', 'step': 4384, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 6644714900096}, 'timestamp': '2025-09-10 02:25:04.416262', 'step': 4384, 'epoch': 2} {'type': 'loss', 'content': 0.0001689386263024062, 'timestamp': '2025-09-10 02:25:04.420840', 'step': 4385, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 176], 'flops': 5220903722048}, 'timestamp': '2025-09-10 02:25:04.451403', 'step': 4385, 'epoch': 2} {'type': 'loss', 'content': 0.00041699831490404904, 'timestamp': '2025-09-10 02:25:04.455584', 'step': 4386, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 4746299996032}, 'timestamp': '2025-09-10 02:25:04.486246', 'step': 4386, 'epoch': 2} {'type': 'loss', 'content': 0.07199867814779282, 'timestamp': '2025-09-10 02:25:04.489002', 'step': 4387, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 7593922352128}, 'timestamp': '2025-09-10 02:25:04.520589', 'step': 4387, 'epoch': 2} {'type': 'loss', 'content': 0.04156893119215965, 'timestamp': '2025-09-10 02:25:04.549335', 'step': 4388, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 336], 'flops': 9966940982208}, 'timestamp': '2025-09-10 02:25:04.581471', 'step': 4388, 'epoch': 2} {'type': 'loss', 'content': 0.0010044872760772705, 'timestamp': '2025-09-10 02:25:04.594236', 'step': 4389, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 7593922352128}, 'timestamp': '2025-09-10 02:25:04.627227', 'step': 4389, 'epoch': 2} {'type': 'loss', 'content': 0.009525059722363949, 'timestamp': '2025-09-10 02:25:04.634148', 'step': 4390, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 320], 'flops': 9492337256192}, 'timestamp': '2025-09-10 02:25:04.666518', 'step': 4390, 'epoch': 2} {'type': 'loss', 'content': 0.010987967252731323, 'timestamp': '2025-09-10 02:25:04.677857', 'step': 4391, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 6644714900096}, 'timestamp': '2025-09-10 02:25:04.710710', 'step': 4391, 'epoch': 2} {'type': 'loss', 'content': 0.00037763340515084565, 'timestamp': '2025-09-10 02:25:04.738121', 'step': 4392, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 528], 'flops': 15662185694400}, 'timestamp': '2025-09-10 02:25:04.782454', 'step': 4392, 'epoch': 2} {'type': 'loss', 'content': 0.009227042086422443, 'timestamp': '2025-09-10 02:25:04.801406', 'step': 4393, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 7119318626112}, 'timestamp': '2025-09-10 02:25:04.836946', 'step': 4393, 'epoch': 2} {'type': 'loss', 'content': 0.0056563569232821465, 'timestamp': '2025-09-10 02:25:04.843328', 'step': 4394, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 336], 'flops': 9966940982208}, 'timestamp': '2025-09-10 02:25:04.878055', 'step': 4394, 'epoch': 2} {'type': 'loss', 'content': 0.002920966362580657, 'timestamp': '2025-09-10 02:25:04.891381', 'step': 4395, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 6170111174080}, 'timestamp': '2025-09-10 02:25:04.923937', 'step': 4395, 'epoch': 2} {'type': 'loss', 'content': 0.0005617746501229703, 'timestamp': '2025-09-10 02:25:04.951134', 'step': 4396, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 7119318626112}, 'timestamp': '2025-09-10 02:25:04.985495', 'step': 4396, 'epoch': 2} {'type': 'loss', 'content': 0.0088628139346838, 'timestamp': '2025-09-10 02:25:04.988524', 'step': 4397, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 7119318626112}, 'timestamp': '2025-09-10 02:25:05.022980', 'step': 4397, 'epoch': 2} {'type': 'loss', 'content': 0.002569663105532527, 'timestamp': '2025-09-10 02:25:05.028624', 'step': 4398, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 368], 'flops': 10916148434240}, 'timestamp': '2025-09-10 02:25:05.064404', 'step': 4398, 'epoch': 2} {'type': 'loss', 'content': 0.004381218459457159, 'timestamp': '2025-09-10 02:25:05.078266', 'step': 4399, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 6170111174080}, 'timestamp': '2025-09-10 02:25:05.110148', 'step': 4399, 'epoch': 2} {'type': 'loss', 'content': 0.00011449779412942007, 'timestamp': '2025-09-10 02:25:05.137669', 'step': 4400, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 416], 'flops': 12339959612288}, 'timestamp': '2025-09-10 02:25:05.175220', 'step': 4400, 'epoch': 2} {'type': 'loss', 'content': 0.0009381847339682281, 'timestamp': '2025-09-10 02:25:05.190670', 'step': 4401, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 336], 'flops': 9966940982208}, 'timestamp': '2025-09-10 02:25:05.225844', 'step': 4401, 'epoch': 2} {'type': 'loss', 'content': 0.003086991375312209, 'timestamp': '2025-09-10 02:25:05.239188', 'step': 4402, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 5695507448064}, 'timestamp': '2025-09-10 02:25:05.270382', 'step': 4402, 'epoch': 2} {'type': 'loss', 'content': 0.0008446700521744788, 'timestamp': '2025-09-10 02:25:05.274812', 'step': 4403, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 5695507448064}, 'timestamp': '2025-09-10 02:25:05.306126', 'step': 4403, 'epoch': 2} {'type': 'loss', 'content': 0.012834797613322735, 'timestamp': '2025-09-10 02:25:05.331405', 'step': 4404, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 8068526078144}, 'timestamp': '2025-09-10 02:25:05.362930', 'step': 4404, 'epoch': 2} {'type': 'loss', 'content': 0.002319957595318556, 'timestamp': '2025-09-10 02:25:05.370483', 'step': 4405, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 176], 'flops': 5220903722048}, 'timestamp': '2025-09-10 02:25:05.402303', 'step': 4405, 'epoch': 2} {'type': 'loss', 'content': 0.021244987845420837, 'timestamp': '2025-09-10 02:25:05.406316', 'step': 4406, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 7119318626112}, 'timestamp': '2025-09-10 02:25:05.438704', 'step': 4406, 'epoch': 2} {'type': 'loss', 'content': 0.0019213539781048894, 'timestamp': '2025-09-10 02:25:05.446034', 'step': 4407, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 336], 'flops': 9966940982208}, 'timestamp': '2025-09-10 02:25:05.479814', 'step': 4407, 'epoch': 2} {'type': 'loss', 'content': 0.02890808694064617, 'timestamp': '2025-09-10 02:25:05.514178', 'step': 4408, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 7119318626112}, 'timestamp': '2025-09-10 02:25:05.546653', 'step': 4408, 'epoch': 2} {'type': 'loss', 'content': 0.007281397935003042, 'timestamp': '2025-09-10 02:25:05.551380', 'step': 4409, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 5695507448064}, 'timestamp': '2025-09-10 02:25:05.583266', 'step': 4409, 'epoch': 2} {'type': 'loss', 'content': 0.00019025967048946768, 'timestamp': '2025-09-10 02:25:05.587500', 'step': 4410, 'epoch': 2} {'type': 'flops', 'content': [{'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5695213340928}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3796808916480}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4429610391296}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4746011128704}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4746011128704}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6011614078336}, {'type': 'perplexity', 'in_batch_dim': [8, 736], 'batch_size': 8, 'flops': 14554433988352}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3796808916480}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4113209653888}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3796808916480}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4746011128704}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2847606704256}, {'type': 'perplexity', 'in_batch_dim': [8, 320], 'batch_size': 8, 'flops': 6328014815744}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5378812603520}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3796808916480}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4746011128704}, {'type': 'perplexity', 'in_batch_dim': [8, 352], 'batch_size': 8, 'flops': 6960816290560}, {'type': 'perplexity', 'in_batch_dim': [8, 384], 'batch_size': 8, 'flops': 7593617765376}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5378812603520}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4429610391296}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4113209653888}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5062411866112}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5062411866112}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4429610391296}, {'type': 'perplexity', 'in_batch_dim': [8, 480], 'batch_size': 8, 'flops': 9492022189824}, {'type': 'perplexity', 'in_batch_dim': [8, 448], 'batch_size': 8, 'flops': 8859220715008}, {'type': 'perplexity', 'in_batch_dim': [8, 320], 'batch_size': 8, 'flops': 6328014815744}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3796808916480}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6011614078336}, {'type': 'perplexity', 'in_batch_dim': [8, 320], 'batch_size': 8, 'flops': 6328014815744}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5695213340928}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4746011128704}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4429610391296}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5378812603520}, {'type': 'perplexity', 'in_batch_dim': [8, 544], 'batch_size': 8, 'flops': 10757625139456}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4429610391296}, {'type': 'perplexity', 'in_batch_dim': [8, 320], 'batch_size': 8, 'flops': 6328014815744}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4113209653888}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4113209653888}, {'type': 'perplexity', 'in_batch_dim': [8, 320], 'batch_size': 8, 'flops': 6328014815744}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3480408179072}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5062411866112}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6011614078336}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4429610391296}, {'type': 'perplexity', 'in_batch_dim': [8, 384], 'batch_size': 8, 'flops': 7593617765376}, {'type': 'perplexity', 'in_batch_dim': [8, 416], 'batch_size': 8, 'flops': 8226419240192}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4746011128704}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3480408179072}, {'type': 'perplexity', 'in_batch_dim': [8, 384], 'batch_size': 8, 'flops': 7593617765376}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3480408179072}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3480408179072}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5378812603520}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3796808916480}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4113209653888}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5062411866112}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5378812603520}, {'type': 'perplexity', 'in_batch_dim': [8, 320], 'batch_size': 8, 'flops': 6328014815744}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4429610391296}, {'type': 'perplexity', 'in_batch_dim': [8, 160], 'batch_size': 8, 'flops': 3164007441664}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4113209653888}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5695213340928}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3796808916480}, {'type': 'perplexity', 'in_batch_dim': [8, 160], 'batch_size': 8, 'flops': 3164007441664}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6011614078336}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5062411866112}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4429610391296}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3480408179072}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4746011128704}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3480408179072}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4113209653888}, {'type': 'perplexity', 'in_batch_dim': [8, 624], 'batch_size': 8, 'flops': 12339628826496}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4113209653888}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5695213340928}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5062411866112}, {'type': 'perplexity', 'in_batch_dim': [8, 384], 'batch_size': 8, 'flops': 7593617765376}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4429610391296}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4113209653888}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5062411866112}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4429610391296}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5695213340928}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3796808916480}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5378812603520}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4429610391296}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5378812603520}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3796808916480}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5695213340928}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4429610391296}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5062411866112}, {'type': 'perplexity', 'in_batch_dim': [8, 160], 'batch_size': 8, 'flops': 3164007441664}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4746011128704}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5695213340928}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3796808916480}, {'type': 'perplexity', 'in_batch_dim': [8, 416], 'batch_size': 8, 'flops': 8226419240192}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3796808916480}, {'type': 'perplexity', 'in_batch_dim': [8, 160], 'batch_size': 8, 'flops': 3164007441664}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4746011128704}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3796808916480}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2847606704256}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5378812603520}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4429610391296}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4113209653888}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4429610391296}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6011614078336}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4113209653888}, {'type': 'perplexity', 'in_batch_dim': [8, 320], 'batch_size': 8, 'flops': 6328014815744}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4746011128704}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4746011128704}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3796808916480}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5062411866112}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3480408179072}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3796808916480}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3480408179072}, {'type': 'perplexity', 'in_batch_dim': [8, 352], 'batch_size': 8, 'flops': 6960816290560}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5062411866112}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4429610391296}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6011614078336}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5062411866112}, {'type': 'perplexity', 'in_batch_dim': [8, 576], 'batch_size': 8, 'flops': 11390426614272}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4429610391296}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3796808916480}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6011614078336}, {'type': 'perplexity', 'in_batch_dim': [8, 432], 'batch_size': 8, 'flops': 8542819977600}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5378812603520}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5378812603520}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5062411866112}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4113209653888}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4113209653888}, {'type': 'perplexity', 'in_batch_dim': [8, 368], 'batch_size': 8, 'flops': 7277217027968}, {'type': 'perplexity', 'in_batch_dim': [8, 160], 'batch_size': 8, 'flops': 3164007441664}, {'type': 'perplexity', 'in_batch_dim': [8, 368], 'batch_size': 8, 'flops': 7277217027968}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3796808916480}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4746011128704}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4113209653888}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4429610391296}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5695213340928}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5695213340928}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5062411866112}, {'type': 'perplexity', 'in_batch_dim': [8, 432], 'batch_size': 8, 'flops': 8542819977600}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5062411866112}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4113209653888}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3480408179072}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3480408179072}, {'type': 'perplexity', 'in_batch_dim': [8, 160], 'batch_size': 8, 'flops': 3164007441664}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4113209653888}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3796808916480}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5062411866112}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 368], 'batch_size': 8, 'flops': 7277217027968}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5378812603520}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5378812603520}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5062411866112}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2847606704256}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4429610391296}, {'type': 'perplexity', 'in_batch_dim': [8, 320], 'batch_size': 8, 'flops': 6328014815744}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3796808916480}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4429610391296}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4113209653888}, {'type': 'perplexity', 'in_batch_dim': [8, 384], 'batch_size': 8, 'flops': 7593617765376}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5378812603520}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4746011128704}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3480408179072}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5062411866112}, {'type': 'perplexity', 'in_batch_dim': [8, 320], 'batch_size': 8, 'flops': 6328014815744}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5378812603520}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4113209653888}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4113209653888}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5695213340928}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3480408179072}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6011614078336}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4746011128704}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5378812603520}, {'type': 'perplexity', 'in_batch_dim': [8, 320], 'batch_size': 8, 'flops': 6328014815744}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4429610391296}, {'type': 'perplexity', 'in_batch_dim': [8, 320], 'batch_size': 8, 'flops': 6328014815744}, {'type': 'perplexity', 'in_batch_dim': [8, 416], 'batch_size': 8, 'flops': 8226419240192}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5062411866112}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5062411866112}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2847606704256}, {'type': 'perplexity', 'in_batch_dim': [8, 336], 'batch_size': 8, 'flops': 6644415553152}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6011614078336}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5062411866112}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4113209653888}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5695213340928}, {'type': 'perplexity', 'in_batch_dim': [8, 416], 'batch_size': 8, 'flops': 8226419240192}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3796808916480}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5695213340928}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4746011128704}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4429610391296}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4113209653888}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3480408179072}, {'type': 'perplexity', 'in_batch_dim': [8, 320], 'batch_size': 8, 'flops': 6328014815744}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3480408179072}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5378812603520}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6011614078336}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4429610391296}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5695213340928}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5695213340928}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3480408179072}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4429610391296}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5062411866112}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3480408179072}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4113209653888}, {'type': 'perplexity', 'in_batch_dim': [8, 352], 'batch_size': 8, 'flops': 6960816290560}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4113209653888}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2847606704256}, {'type': 'perplexity', 'in_batch_dim': [8, 160], 'batch_size': 8, 'flops': 3164007441664}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3796808916480}, {'type': 'perplexity', 'in_batch_dim': [8, 496], 'batch_size': 8, 'flops': 9808422927232}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3480408179072}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4113209653888}, {'type': 'perplexity', 'in_batch_dim': [8, 544], 'batch_size': 8, 'flops': 10757625139456}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6011614078336}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5378812603520}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5695213340928}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4746011128704}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5695213340928}, {'type': 'perplexity', 'in_batch_dim': [8, 384], 'batch_size': 8, 'flops': 7593617765376}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3480408179072}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5378812603520}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3480408179072}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5378812603520}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5062411866112}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3796808916480}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5062411866112}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5695213340928}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5378812603520}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4429610391296}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5062411866112}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3480408179072}, {'type': 'perplexity', 'in_batch_dim': [8, 656], 'batch_size': 8, 'flops': 12972430301312}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5062411866112}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3796808916480}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3480408179072}, {'type': 'perplexity', 'in_batch_dim': [8, 160], 'batch_size': 8, 'flops': 3164007441664}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5695213340928}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6011614078336}, {'type': 'perplexity', 'in_batch_dim': [8, 320], 'batch_size': 8, 'flops': 6328014815744}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5062411866112}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5062411866112}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5695213340928}, {'type': 'perplexity', 'in_batch_dim': [8, 320], 'batch_size': 8, 'flops': 6328014815744}, {'type': 'perplexity', 'in_batch_dim': [8, 384], 'batch_size': 8, 'flops': 7593617765376}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5062411866112}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4746011128704}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4113209653888}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4746011128704}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4746011128704}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3480408179072}, {'type': 'perplexity', 'in_batch_dim': [8, 320], 'batch_size': 8, 'flops': 6328014815744}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5695213340928}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5062411866112}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4113209653888}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4746011128704}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2847606704256}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3796808916480}, {'type': 'perplexity', 'in_batch_dim': [8, 336], 'batch_size': 8, 'flops': 6644415553152}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4113209653888}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4746011128704}, {'type': 'perplexity', 'in_batch_dim': [8, 496], 'batch_size': 8, 'flops': 9808422927232}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5378812603520}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5378812603520}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4429610391296}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5062411866112}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6011614078336}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4746011128704}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3796808916480}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4113209653888}, {'type': 'perplexity', 'in_batch_dim': [8, 496], 'batch_size': 8, 'flops': 9808422927232}, {'type': 'perplexity', 'in_batch_dim': [8, 448], 'batch_size': 8, 'flops': 8859220715008}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6011614078336}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3796808916480}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3480408179072}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5062411866112}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4429610391296}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4113209653888}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4746011128704}, {'type': 'perplexity', 'in_batch_dim': [8, 320], 'batch_size': 8, 'flops': 6328014815744}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6011614078336}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3480408179072}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3796808916480}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3796808916480}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4746011128704}, {'type': 'perplexity', 'in_batch_dim': [8, 416], 'batch_size': 8, 'flops': 8226419240192}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5695213340928}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6011614078336}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6011614078336}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4746011128704}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3480408179072}, {'type': 'perplexity', 'in_batch_dim': [8, 160], 'batch_size': 8, 'flops': 3164007441664}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5695213340928}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3480408179072}, {'type': 'perplexity', 'in_batch_dim': [8, 400], 'batch_size': 8, 'flops': 7910018502784}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4429610391296}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5378812603520}, {'type': 'perplexity', 'in_batch_dim': [8, 400], 'batch_size': 8, 'flops': 7910018502784}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5378812603520}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3480408179072}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6011614078336}, {'type': 'perplexity', 'in_batch_dim': [8, 160], 'batch_size': 8, 'flops': 3164007441664}, {'type': 'perplexity', 'in_batch_dim': [8, 352], 'batch_size': 8, 'flops': 6960816290560}, {'type': 'perplexity', 'in_batch_dim': [8, 320], 'batch_size': 8, 'flops': 6328014815744}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5062411866112}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5062411866112}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4113209653888}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4429610391296}, {'type': 'perplexity', 'in_batch_dim': [8, 352], 'batch_size': 8, 'flops': 6960816290560}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4113209653888}, {'type': 'perplexity', 'in_batch_dim': [8, 160], 'batch_size': 8, 'flops': 3164007441664}, {'type': 'perplexity', 'in_batch_dim': [8, 336], 'batch_size': 8, 'flops': 6644415553152}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4429610391296}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3796808916480}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5378812603520}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5378812603520}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4113209653888}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5378812603520}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3796808916480}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5378812603520}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5378812603520}, {'type': 'perplexity', 'in_batch_dim': [8, 160], 'batch_size': 8, 'flops': 3164007441664}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3480408179072}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6011614078336}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5695213340928}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5378812603520}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5378812603520}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4429610391296}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4746011128704}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6011614078336}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5062411866112}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6011614078336}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5378812603520}, {'type': 'perplexity', 'in_batch_dim': [8, 160], 'batch_size': 8, 'flops': 3164007441664}, {'type': 'perplexity', 'in_batch_dim': [8, 160], 'batch_size': 8, 'flops': 3164007441664}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5062411866112}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5062411866112}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3796808916480}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6011614078336}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4113209653888}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4746011128704}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5378812603520}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3480408179072}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3796808916480}, {'type': 'perplexity', 'in_batch_dim': [8, 320], 'batch_size': 8, 'flops': 6328014815744}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5062411866112}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4746011128704}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5062411866112}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4746011128704}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6011614078336}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3796808916480}, {'type': 'perplexity', 'in_batch_dim': [8, 160], 'batch_size': 8, 'flops': 3164007441664}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2847606704256}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4429610391296}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4746011128704}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5062411866112}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5378812603520}, {'type': 'perplexity', 'in_batch_dim': [8, 464], 'batch_size': 8, 'flops': 9175621452416}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4113209653888}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3480408179072}, {'type': 'perplexity', 'in_batch_dim': [8, 384], 'batch_size': 8, 'flops': 7593617765376}, {'type': 'perplexity', 'in_batch_dim': [8, 448], 'batch_size': 8, 'flops': 8859220715008}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5378812603520}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4113209653888}, {'type': 'perplexity', 'in_batch_dim': [8, 320], 'batch_size': 8, 'flops': 6328014815744}, {'type': 'perplexity', 'in_batch_dim': [8, 560], 'batch_size': 8, 'flops': 11074025876864}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5378812603520}, {'type': 'perplexity', 'in_batch_dim': [8, 160], 'batch_size': 8, 'flops': 3164007441664}, {'type': 'perplexity', 'in_batch_dim': [8, 320], 'batch_size': 8, 'flops': 6328014815744}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5062411866112}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6011614078336}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4746011128704}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6011614078336}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3796808916480}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4429610391296}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4113209653888}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4746011128704}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3480408179072}, {'type': 'perplexity', 'in_batch_dim': [8, 384], 'batch_size': 8, 'flops': 7593617765376}, {'type': 'perplexity', 'in_batch_dim': [8, 576], 'batch_size': 8, 'flops': 11390426614272}, {'type': 'perplexity', 'in_batch_dim': [8, 160], 'batch_size': 8, 'flops': 3164007441664}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4746011128704}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4746011128704}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4429610391296}, {'type': 'perplexity', 'in_batch_dim': [8, 336], 'batch_size': 8, 'flops': 6644415553152}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4746011128704}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3796808916480}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3796808916480}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5062411866112}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5378812603520}, {'type': 'perplexity', 'in_batch_dim': [8, 368], 'batch_size': 8, 'flops': 7277217027968}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3480408179072}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4113209653888}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4746011128704}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3480408179072}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4746011128704}, {'type': 'perplexity', 'in_batch_dim': [8, 1168], 'batch_size': 8, 'flops': 23097253898368}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5062411866112}, {'type': 'perplexity', 'in_batch_dim': [8, 160], 'batch_size': 8, 'flops': 3164007441664}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4746011128704}, {'type': 'perplexity', 'in_batch_dim': [8, 640], 'batch_size': 8, 'flops': 12656029563904}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5378812603520}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4429610391296}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4429610391296}, {'type': 'perplexity', 'in_batch_dim': [8, 384], 'batch_size': 8, 'flops': 7593617765376}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3480408179072}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4113209653888}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3796808916480}, {'type': 'perplexity', 'in_batch_dim': [8, 336], 'batch_size': 8, 'flops': 6644415553152}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3480408179072}, {'type': 'perplexity', 'in_batch_dim': [6, 208], 'batch_size': 8, 'flops': 4113209653888}], 'timestamp': '2025-09-10 02:25:15.850616', 'step': 4410, 'epoch': 2} {'type': 'pplx', 'content': 20370479.949023202, 'timestamp': '2025-09-10 02:25:15.854937', 'step': 4410, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 8543129804160}, 'timestamp': '2025-09-10 02:25:15.885941', 'step': 4410, 'epoch': 2} {'type': 'loss', 'content': 0.0011351705761626363, 'timestamp': '2025-09-10 02:25:15.895670', 'step': 4411, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 4746299996032}, 'timestamp': '2025-09-10 02:25:15.930248', 'step': 4411, 'epoch': 2} {'type': 'loss', 'content': 0.0018093172693625093, 'timestamp': '2025-09-10 02:25:15.953841', 'step': 4412, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 6170111174080}, 'timestamp': '2025-09-10 02:25:15.985315', 'step': 4412, 'epoch': 2} {'type': 'loss', 'content': 0.025083277374505997, 'timestamp': '2025-09-10 02:25:15.989623', 'step': 4413, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 304], 'flops': 9017733530176}, 'timestamp': '2025-09-10 02:25:16.019818', 'step': 4413, 'epoch': 2} {'type': 'loss', 'content': 0.0019721267744898796, 'timestamp': '2025-09-10 02:25:16.032036', 'step': 4414, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 8068526078144}, 'timestamp': '2025-09-10 02:25:16.061635', 'step': 4414, 'epoch': 2} {'type': 'loss', 'content': 0.0004431180714163929, 'timestamp': '2025-09-10 02:25:16.071615', 'step': 4415, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 8068526078144}, 'timestamp': '2025-09-10 02:25:16.102136', 'step': 4415, 'epoch': 2} {'type': 'loss', 'content': 0.006176200695335865, 'timestamp': '2025-09-10 02:25:16.133398', 'step': 4416, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 336], 'flops': 9966940982208}, 'timestamp': '2025-09-10 02:25:16.163904', 'step': 4416, 'epoch': 2} {'type': 'loss', 'content': 0.0014073234051465988, 'timestamp': '2025-09-10 02:25:16.176495', 'step': 4417, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 304], 'flops': 9017733530176}, 'timestamp': '2025-09-10 02:25:16.208014', 'step': 4417, 'epoch': 2} {'type': 'loss', 'content': 0.0057088471949100494, 'timestamp': '2025-09-10 02:25:16.220069', 'step': 4418, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 6644714900096}, 'timestamp': '2025-09-10 02:25:16.250957', 'step': 4418, 'epoch': 2} {'type': 'loss', 'content': 0.0012675122125074267, 'timestamp': '2025-09-10 02:25:16.257909', 'step': 4419, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 7593922352128}, 'timestamp': '2025-09-10 02:25:16.288247', 'step': 4419, 'epoch': 2} {'type': 'loss', 'content': 0.01437693927437067, 'timestamp': '2025-09-10 02:25:16.316874', 'step': 4420, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 7119318626112}, 'timestamp': '2025-09-10 02:25:16.347430', 'step': 4420, 'epoch': 2} {'type': 'loss', 'content': 0.0025128854904323816, 'timestamp': '2025-09-10 02:25:16.352338', 'step': 4421, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 4746299996032}, 'timestamp': '2025-09-10 02:25:16.382941', 'step': 4421, 'epoch': 2} {'type': 'loss', 'content': 0.003963653929531574, 'timestamp': '2025-09-10 02:25:16.385672', 'step': 4422, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 7119318626112}, 'timestamp': '2025-09-10 02:25:16.416172', 'step': 4422, 'epoch': 2} {'type': 'loss', 'content': 0.020399346947669983, 'timestamp': '2025-09-10 02:25:16.423691', 'step': 4423, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 304], 'flops': 9017733530176}, 'timestamp': '2025-09-10 02:25:16.453886', 'step': 4423, 'epoch': 2} {'type': 'loss', 'content': 0.03138017654418945, 'timestamp': '2025-09-10 02:25:16.487035', 'step': 4424, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 304], 'flops': 9017733530176}, 'timestamp': '2025-09-10 02:25:16.517208', 'step': 4424, 'epoch': 2} {'type': 'loss', 'content': 0.0019118200289085507, 'timestamp': '2025-09-10 02:25:16.526815', 'step': 4425, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 7119318626112}, 'timestamp': '2025-09-10 02:25:16.557895', 'step': 4425, 'epoch': 2} {'type': 'loss', 'content': 0.005137981381267309, 'timestamp': '2025-09-10 02:25:16.565386', 'step': 4426, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 8543129804160}, 'timestamp': '2025-09-10 02:25:16.596353', 'step': 4426, 'epoch': 2} {'type': 'loss', 'content': 0.001418622094206512, 'timestamp': '2025-09-10 02:25:16.607299', 'step': 4427, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 7119318626112}, 'timestamp': '2025-09-10 02:25:16.638091', 'step': 4427, 'epoch': 2} {'type': 'loss', 'content': 0.0003645730612333864, 'timestamp': '2025-09-10 02:25:16.666574', 'step': 4428, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 7119318626112}, 'timestamp': '2025-09-10 02:25:16.696492', 'step': 4428, 'epoch': 2} {'type': 'loss', 'content': 0.00549284229055047, 'timestamp': '2025-09-10 02:25:16.701706', 'step': 4429, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 6644714900096}, 'timestamp': '2025-09-10 02:25:16.732318', 'step': 4429, 'epoch': 2} {'type': 'loss', 'content': 0.04728490859270096, 'timestamp': '2025-09-10 02:25:16.739376', 'step': 4430, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 304], 'flops': 9017733530176}, 'timestamp': '2025-09-10 02:25:16.769497', 'step': 4430, 'epoch': 2} {'type': 'loss', 'content': 0.001036238856613636, 'timestamp': '2025-09-10 02:25:16.781689', 'step': 4431, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 352], 'flops': 10441544708224}, 'timestamp': '2025-09-10 02:25:16.816547', 'step': 4431, 'epoch': 2} {'type': 'loss', 'content': 0.009465152397751808, 'timestamp': '2025-09-10 02:25:16.851174', 'step': 4432, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-10 02:25:16.881588', 'step': 4432, 'epoch': 2} {'type': 'loss', 'content': 0.012627379037439823, 'timestamp': '2025-09-10 02:25:16.883722', 'step': 4433, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-10 02:25:16.913932', 'step': 4433, 'epoch': 2} {'type': 'loss', 'content': 0.00041100315866060555, 'timestamp': '2025-09-10 02:25:16.916237', 'step': 4434, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 7593922352128}, 'timestamp': '2025-09-10 02:25:16.945630', 'step': 4434, 'epoch': 2} {'type': 'loss', 'content': 0.0013999169459566474, 'timestamp': '2025-09-10 02:25:16.953387', 'step': 4435, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 7593922352128}, 'timestamp': '2025-09-10 02:25:16.983926', 'step': 4435, 'epoch': 2} {'type': 'loss', 'content': 0.02100582979619503, 'timestamp': '2025-09-10 02:25:17.012694', 'step': 4436, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 512], 'flops': 15187581968384}, 'timestamp': '2025-09-10 02:25:17.053341', 'step': 4436, 'epoch': 2} {'type': 'loss', 'content': 0.001676683546975255, 'timestamp': '2025-09-10 02:25:17.070677', 'step': 4437, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 7593922352128}, 'timestamp': '2025-09-10 02:25:17.101443', 'step': 4437, 'epoch': 2} {'type': 'loss', 'content': 0.0023947085719555616, 'timestamp': '2025-09-10 02:25:17.109357', 'step': 4438, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 6644714900096}, 'timestamp': '2025-09-10 02:25:17.140639', 'step': 4438, 'epoch': 2} {'type': 'loss', 'content': 0.027537260204553604, 'timestamp': '2025-09-10 02:25:17.147759', 'step': 4439, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 304], 'flops': 9017733530176}, 'timestamp': '2025-09-10 02:25:17.177732', 'step': 4439, 'epoch': 2} {'type': 'loss', 'content': 0.008474222384393215, 'timestamp': '2025-09-10 02:25:17.210732', 'step': 4440, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 176], 'flops': 5220903722048}, 'timestamp': '2025-09-10 02:25:17.240867', 'step': 4440, 'epoch': 2} {'type': 'loss', 'content': 0.005611742846667767, 'timestamp': '2025-09-10 02:25:17.243416', 'step': 4441, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 400], 'flops': 11865355886272}, 'timestamp': '2025-09-10 02:25:17.281556', 'step': 4441, 'epoch': 2} {'type': 'loss', 'content': 0.0019225550349801779, 'timestamp': '2025-09-10 02:25:17.297179', 'step': 4442, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 6644714900096}, 'timestamp': '2025-09-10 02:25:17.329927', 'step': 4442, 'epoch': 2} {'type': 'loss', 'content': 0.001591675216332078, 'timestamp': '2025-09-10 02:25:17.336903', 'step': 4443, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 6644714900096}, 'timestamp': '2025-09-10 02:25:17.367487', 'step': 4443, 'epoch': 2} {'type': 'loss', 'content': 0.0011540406849235296, 'timestamp': '2025-09-10 02:25:17.395419', 'step': 4444, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 384], 'flops': 11390752160256}, 'timestamp': '2025-09-10 02:25:17.431151', 'step': 4444, 'epoch': 2} {'type': 'loss', 'content': 0.0016605369746685028, 'timestamp': '2025-09-10 02:25:17.444438', 'step': 4445, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 4746299996032}, 'timestamp': '2025-09-10 02:25:17.474779', 'step': 4445, 'epoch': 2} {'type': 'loss', 'content': 0.033832911401987076, 'timestamp': '2025-09-10 02:25:17.477354', 'step': 4446, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 320], 'flops': 9492337256192}, 'timestamp': '2025-09-10 02:25:17.507953', 'step': 4446, 'epoch': 2} {'type': 'loss', 'content': 0.02437257580459118, 'timestamp': '2025-09-10 02:25:17.520474', 'step': 4447, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 400], 'flops': 11865355886272}, 'timestamp': '2025-09-10 02:25:17.557930', 'step': 4447, 'epoch': 2} {'type': 'loss', 'content': 0.0006624148809351027, 'timestamp': '2025-09-10 02:25:17.594424', 'step': 4448, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 4746299996032}, 'timestamp': '2025-09-10 02:25:17.626626', 'step': 4448, 'epoch': 2} {'type': 'loss', 'content': 0.005153920501470566, 'timestamp': '2025-09-10 02:25:17.628614', 'step': 4449, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 400], 'flops': 11865355886272}, 'timestamp': '2025-09-10 02:25:17.666898', 'step': 4449, 'epoch': 2} {'type': 'loss', 'content': 0.025530895218253136, 'timestamp': '2025-09-10 02:25:17.682503', 'step': 4450, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 7593922352128}, 'timestamp': '2025-09-10 02:25:17.714804', 'step': 4450, 'epoch': 2} {'type': 'loss', 'content': 0.00903196632862091, 'timestamp': '2025-09-10 02:25:17.722651', 'step': 4451, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 8543129804160}, 'timestamp': '2025-09-10 02:25:17.756406', 'step': 4451, 'epoch': 2} {'type': 'loss', 'content': 0.018469911068677902, 'timestamp': '2025-09-10 02:25:17.788252', 'step': 4452, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 7119318626112}, 'timestamp': '2025-09-10 02:25:17.823443', 'step': 4452, 'epoch': 2} {'type': 'loss', 'content': 0.00648995628580451, 'timestamp': '2025-09-10 02:25:17.828496', 'step': 4453, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 8068526078144}, 'timestamp': '2025-09-10 02:25:17.859668', 'step': 4453, 'epoch': 2} {'type': 'loss', 'content': 0.0027806435246020555, 'timestamp': '2025-09-10 02:25:17.869766', 'step': 4454, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 6170111174080}, 'timestamp': '2025-09-10 02:25:17.899869', 'step': 4454, 'epoch': 2} {'type': 'loss', 'content': 0.0024515967816114426, 'timestamp': '2025-09-10 02:25:17.906768', 'step': 4455, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 304], 'flops': 9017733530176}, 'timestamp': '2025-09-10 02:25:17.937126', 'step': 4455, 'epoch': 2} {'type': 'loss', 'content': 0.0002551526122260839, 'timestamp': '2025-09-10 02:25:17.969578', 'step': 4456, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 8543129804160}, 'timestamp': '2025-09-10 02:25:17.999872', 'step': 4456, 'epoch': 2} {'type': 'loss', 'content': 0.003521733218804002, 'timestamp': '2025-09-10 02:25:18.008432', 'step': 4457, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 4746299996032}, 'timestamp': '2025-09-10 02:25:18.038618', 'step': 4457, 'epoch': 2} {'type': 'loss', 'content': 0.0038238188717514277, 'timestamp': '2025-09-10 02:25:18.041246', 'step': 4458, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 5695507448064}, 'timestamp': '2025-09-10 02:25:18.071557', 'step': 4458, 'epoch': 2} {'type': 'loss', 'content': 0.001157488557510078, 'timestamp': '2025-09-10 02:25:18.076059', 'step': 4459, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 7119318626112}, 'timestamp': '2025-09-10 02:25:18.106247', 'step': 4459, 'epoch': 2} {'type': 'loss', 'content': 0.006657246965914965, 'timestamp': '2025-09-10 02:25:18.136622', 'step': 4460, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 8543129804160}, 'timestamp': '2025-09-10 02:25:18.169556', 'step': 4460, 'epoch': 2} {'type': 'loss', 'content': 0.0043445127084851265, 'timestamp': '2025-09-10 02:25:18.177946', 'step': 4461, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 336], 'flops': 9966940982208}, 'timestamp': '2025-09-10 02:25:18.212263', 'step': 4461, 'epoch': 2} {'type': 'loss', 'content': 0.006325058173388243, 'timestamp': '2025-09-10 02:25:18.225631', 'step': 4462, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 5695507448064}, 'timestamp': '2025-09-10 02:25:18.255544', 'step': 4462, 'epoch': 2} {'type': 'loss', 'content': 0.031205790117383003, 'timestamp': '2025-09-10 02:25:18.260132', 'step': 4463, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 336], 'flops': 9966940982208}, 'timestamp': '2025-09-10 02:25:18.293387', 'step': 4463, 'epoch': 2} {'type': 'loss', 'content': 0.006542644929140806, 'timestamp': '2025-09-10 02:25:18.327670', 'step': 4464, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 5695507448064}, 'timestamp': '2025-09-10 02:25:18.358221', 'step': 4464, 'epoch': 2} {'type': 'loss', 'content': 0.0034815913531929255, 'timestamp': '2025-09-10 02:25:18.360567', 'step': 4465, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 8068526078144}, 'timestamp': '2025-09-10 02:25:18.393096', 'step': 4465, 'epoch': 2} {'type': 'loss', 'content': 0.0044549135491251945, 'timestamp': '2025-09-10 02:25:18.403353', 'step': 4466, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 8543129804160}, 'timestamp': '2025-09-10 02:25:18.435701', 'step': 4466, 'epoch': 2} {'type': 'loss', 'content': 0.007206571288406849, 'timestamp': '2025-09-10 02:25:18.446597', 'step': 4467, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 336], 'flops': 9966940982208}, 'timestamp': '2025-09-10 02:25:18.479836', 'step': 4467, 'epoch': 2} {'type': 'loss', 'content': 0.0019098568009212613, 'timestamp': '2025-09-10 02:25:18.514083', 'step': 4468, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 8068526078144}, 'timestamp': '2025-09-10 02:25:18.545584', 'step': 4468, 'epoch': 2} {'type': 'loss', 'content': 0.009369590319693089, 'timestamp': '2025-09-10 02:25:18.553335', 'step': 4469, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 7593922352128}, 'timestamp': '2025-09-10 02:25:18.585127', 'step': 4469, 'epoch': 2} {'type': 'loss', 'content': 0.004385852254927158, 'timestamp': '2025-09-10 02:25:18.592851', 'step': 4470, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 6170111174080}, 'timestamp': '2025-09-10 02:25:18.624213', 'step': 4470, 'epoch': 2} {'type': 'loss', 'content': 0.0033103374298661947, 'timestamp': '2025-09-10 02:25:18.631120', 'step': 4471, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 6170111174080}, 'timestamp': '2025-09-10 02:25:18.663428', 'step': 4471, 'epoch': 2} {'type': 'loss', 'content': 0.023561924695968628, 'timestamp': '2025-09-10 02:25:18.691155', 'step': 4472, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 7119318626112}, 'timestamp': '2025-09-10 02:25:18.722928', 'step': 4472, 'epoch': 2} {'type': 'loss', 'content': 0.012181418016552925, 'timestamp': '2025-09-10 02:25:18.728189', 'step': 4473, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 6170111174080}, 'timestamp': '2025-09-10 02:25:18.765187', 'step': 4473, 'epoch': 2} {'type': 'loss', 'content': 0.001010423176921904, 'timestamp': '2025-09-10 02:25:18.772216', 'step': 4474, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 5695507448064}, 'timestamp': '2025-09-10 02:25:18.805784', 'step': 4474, 'epoch': 2} {'type': 'loss', 'content': 0.013591425493359566, 'timestamp': '2025-09-10 02:25:18.812254', 'step': 4475, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-10 02:25:18.849404', 'step': 4475, 'epoch': 2} {'type': 'loss', 'content': 0.007945683784782887, 'timestamp': '2025-09-10 02:25:18.873116', 'step': 4476, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 304], 'flops': 9017733530176}, 'timestamp': '2025-09-10 02:25:18.903332', 'step': 4476, 'epoch': 2} {'type': 'loss', 'content': 0.010526351630687714, 'timestamp': '2025-09-10 02:25:18.913240', 'step': 4477, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 176], 'flops': 5220903722048}, 'timestamp': '2025-09-10 02:25:18.950587', 'step': 4477, 'epoch': 2} {'type': 'loss', 'content': 0.0060272216796875, 'timestamp': '2025-09-10 02:25:18.954742', 'step': 4478, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 7119318626112}, 'timestamp': '2025-09-10 02:25:18.987285', 'step': 4478, 'epoch': 2} {'type': 'loss', 'content': 0.010436930693686008, 'timestamp': '2025-09-10 02:25:18.994660', 'step': 4479, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 8068526078144}, 'timestamp': '2025-09-10 02:25:19.025145', 'step': 4479, 'epoch': 2} {'type': 'loss', 'content': 0.002587387105450034, 'timestamp': '2025-09-10 02:25:19.056094', 'step': 4480, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 6170111174080}, 'timestamp': '2025-09-10 02:25:19.086029', 'step': 4480, 'epoch': 2} {'type': 'loss', 'content': 0.002534937346354127, 'timestamp': '2025-09-10 02:25:19.090628', 'step': 4481, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 8543129804160}, 'timestamp': '2025-09-10 02:25:19.123598', 'step': 4481, 'epoch': 2} {'type': 'loss', 'content': 0.03924372047185898, 'timestamp': '2025-09-10 02:25:19.134548', 'step': 4482, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 6644714900096}, 'timestamp': '2025-09-10 02:25:19.166117', 'step': 4482, 'epoch': 2} {'type': 'loss', 'content': 0.0028582927770912647, 'timestamp': '2025-09-10 02:25:19.173122', 'step': 4483, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 5695507448064}, 'timestamp': '2025-09-10 02:25:19.205766', 'step': 4483, 'epoch': 2} {'type': 'loss', 'content': 0.0013124326942488551, 'timestamp': '2025-09-10 02:25:19.231252', 'step': 4484, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 8068526078144}, 'timestamp': '2025-09-10 02:25:19.262019', 'step': 4484, 'epoch': 2} {'type': 'loss', 'content': 0.000659207405988127, 'timestamp': '2025-09-10 02:25:19.269985', 'step': 4485, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 4746299996032}, 'timestamp': '2025-09-10 02:25:19.299946', 'step': 4485, 'epoch': 2} {'type': 'loss', 'content': 0.0035707519855350256, 'timestamp': '2025-09-10 02:25:19.302674', 'step': 4486, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 5695507448064}, 'timestamp': '2025-09-10 02:25:19.333939', 'step': 4486, 'epoch': 2} {'type': 'loss', 'content': 0.003890756983309984, 'timestamp': '2025-09-10 02:25:19.338618', 'step': 4487, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 4746299996032}, 'timestamp': '2025-09-10 02:25:19.367790', 'step': 4487, 'epoch': 2} {'type': 'loss', 'content': 0.003235712181776762, 'timestamp': '2025-09-10 02:25:19.391384', 'step': 4488, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 8068526078144}, 'timestamp': '2025-09-10 02:25:19.421271', 'step': 4488, 'epoch': 2} {'type': 'loss', 'content': 0.0025943939108401537, 'timestamp': '2025-09-10 02:25:19.429195', 'step': 4489, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 320], 'flops': 9492337256192}, 'timestamp': '2025-09-10 02:25:19.459709', 'step': 4489, 'epoch': 2} {'type': 'loss', 'content': 0.0013656103983521461, 'timestamp': '2025-09-10 02:25:19.472263', 'step': 4490, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 6170111174080}, 'timestamp': '2025-09-10 02:25:19.502937', 'step': 4490, 'epoch': 2} {'type': 'loss', 'content': 0.0037658896762877703, 'timestamp': '2025-09-10 02:25:19.509624', 'step': 4491, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 304], 'flops': 9017733530176}, 'timestamp': '2025-09-10 02:25:19.541567', 'step': 4491, 'epoch': 2} {'type': 'loss', 'content': 0.002117105294018984, 'timestamp': '2025-09-10 02:25:19.574046', 'step': 4492, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 400], 'flops': 11865355886272}, 'timestamp': '2025-09-10 02:25:19.609978', 'step': 4492, 'epoch': 2} {'type': 'loss', 'content': 0.0034039022866636515, 'timestamp': '2025-09-10 02:25:19.625185', 'step': 4493, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 368], 'flops': 10916148434240}, 'timestamp': '2025-09-10 02:25:19.660066', 'step': 4493, 'epoch': 2} {'type': 'loss', 'content': 0.007604501210153103, 'timestamp': '2025-09-10 02:25:19.673916', 'step': 4494, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 6170111174080}, 'timestamp': '2025-09-10 02:25:19.706338', 'step': 4494, 'epoch': 2} {'type': 'loss', 'content': 0.0011244564084336162, 'timestamp': '2025-09-10 02:25:19.713059', 'step': 4495, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 336], 'flops': 9966940982208}, 'timestamp': '2025-09-10 02:25:19.746550', 'step': 4495, 'epoch': 2} {'type': 'loss', 'content': 0.006800326984375715, 'timestamp': '2025-09-10 02:25:19.780819', 'step': 4496, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 7119318626112}, 'timestamp': '2025-09-10 02:25:19.813417', 'step': 4496, 'epoch': 2} {'type': 'loss', 'content': 0.01871911995112896, 'timestamp': '2025-09-10 02:25:19.818464', 'step': 4497, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 4746299996032}, 'timestamp': '2025-09-10 02:25:19.848580', 'step': 4497, 'epoch': 2} {'type': 'loss', 'content': 0.0017440064111724496, 'timestamp': '2025-09-10 02:25:19.851210', 'step': 4498, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 8068526078144}, 'timestamp': '2025-09-10 02:25:19.881445', 'step': 4498, 'epoch': 2} {'type': 'loss', 'content': 0.0011718474561348557, 'timestamp': '2025-09-10 02:25:19.891679', 'step': 4499, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 8068526078144}, 'timestamp': '2025-09-10 02:25:19.922466', 'step': 4499, 'epoch': 2} {'type': 'loss', 'content': 0.0032657107803970575, 'timestamp': '2025-09-10 02:25:19.953441', 'step': 4500, 'epoch': 2} {'type': 'info', 'content': 'Checkpoint saved at step 4500', 'timestamp': '2025-09-10 02:25:24.557607', 'step': 4500, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 336], 'flops': 9966940982208}, 'timestamp': '2025-09-10 02:25:24.600123', 'step': 4500, 'epoch': 2} {'type': 'loss', 'content': 0.0034066697116941214, 'timestamp': '2025-09-10 02:25:24.610227', 'step': 4501, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 7119318626112}, 'timestamp': '2025-09-10 02:25:24.646491', 'step': 4501, 'epoch': 2} {'type': 'loss', 'content': 0.002542842412367463, 'timestamp': '2025-09-10 02:25:24.653568', 'step': 4502, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 320], 'flops': 9492337256192}, 'timestamp': '2025-09-10 02:25:24.685272', 'step': 4502, 'epoch': 2} {'type': 'loss', 'content': 0.002150564454495907, 'timestamp': '2025-09-10 02:25:24.697290', 'step': 4503, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 7119318626112}, 'timestamp': '2025-09-10 02:25:24.729074', 'step': 4503, 'epoch': 2} {'type': 'loss', 'content': 0.001930785016156733, 'timestamp': '2025-09-10 02:25:24.757013', 'step': 4504, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 7119318626112}, 'timestamp': '2025-09-10 02:25:24.789288', 'step': 4504, 'epoch': 2} {'type': 'loss', 'content': 0.003784495871514082, 'timestamp': '2025-09-10 02:25:24.793966', 'step': 4505, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 7119318626112}, 'timestamp': '2025-09-10 02:25:24.826430', 'step': 4505, 'epoch': 2} {'type': 'loss', 'content': 0.010695603676140308, 'timestamp': '2025-09-10 02:25:24.833409', 'step': 4506, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-10 02:25:24.868356', 'step': 4506, 'epoch': 2} {'type': 'loss', 'content': 0.0040979208424687386, 'timestamp': '2025-09-10 02:25:24.870690', 'step': 4507, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 384], 'flops': 11390752160256}, 'timestamp': '2025-09-10 02:25:24.905472', 'step': 4507, 'epoch': 2} {'type': 'loss', 'content': 0.002046718029305339, 'timestamp': '2025-09-10 02:25:24.940320', 'step': 4508, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 6170111174080}, 'timestamp': '2025-09-10 02:25:24.972380', 'step': 4508, 'epoch': 2} {'type': 'loss', 'content': 0.0030817301012575626, 'timestamp': '2025-09-10 02:25:24.976751', 'step': 4509, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 320], 'flops': 9492337256192}, 'timestamp': '2025-09-10 02:25:25.009031', 'step': 4509, 'epoch': 2} {'type': 'loss', 'content': 0.0035417492035776377, 'timestamp': '2025-09-10 02:25:25.021505', 'step': 4510, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 7593922352128}, 'timestamp': '2025-09-10 02:25:25.052846', 'step': 4510, 'epoch': 2} {'type': 'loss', 'content': 0.0019691623747348785, 'timestamp': '2025-09-10 02:25:25.060224', 'step': 4511, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 7119318626112}, 'timestamp': '2025-09-10 02:25:25.092805', 'step': 4511, 'epoch': 2} {'type': 'loss', 'content': 0.0045999689027667046, 'timestamp': '2025-09-10 02:25:25.120632', 'step': 4512, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 6170111174080}, 'timestamp': '2025-09-10 02:25:25.152028', 'step': 4512, 'epoch': 2} {'type': 'loss', 'content': 0.0011519378749653697, 'timestamp': '2025-09-10 02:25:25.156253', 'step': 4513, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 368], 'flops': 10916148434240}, 'timestamp': '2025-09-10 02:25:25.190703', 'step': 4513, 'epoch': 2} {'type': 'loss', 'content': 0.003534820629283786, 'timestamp': '2025-09-10 02:25:25.204538', 'step': 4514, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 6644714900096}, 'timestamp': '2025-09-10 02:25:25.235373', 'step': 4514, 'epoch': 2} {'type': 'loss', 'content': 0.0012388339964672923, 'timestamp': '2025-09-10 02:25:25.242299', 'step': 4515, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 6644714900096}, 'timestamp': '2025-09-10 02:25:25.273546', 'step': 4515, 'epoch': 2} {'type': 'loss', 'content': 0.004160807467997074, 'timestamp': '2025-09-10 02:25:25.301250', 'step': 4516, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 6644714900096}, 'timestamp': '2025-09-10 02:25:25.333171', 'step': 4516, 'epoch': 2} {'type': 'loss', 'content': 0.022341666743159294, 'timestamp': '2025-09-10 02:25:25.337743', 'step': 4517, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 304], 'flops': 9017733530176}, 'timestamp': '2025-09-10 02:25:25.370232', 'step': 4517, 'epoch': 2} {'type': 'loss', 'content': 0.0011338494950905442, 'timestamp': '2025-09-10 02:25:25.381798', 'step': 4518, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 6170111174080}, 'timestamp': '2025-09-10 02:25:25.414671', 'step': 4518, 'epoch': 2} {'type': 'loss', 'content': 0.015123574994504452, 'timestamp': '2025-09-10 02:25:25.421016', 'step': 4519, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 176], 'flops': 5220903722048}, 'timestamp': '2025-09-10 02:25:25.452284', 'step': 4519, 'epoch': 2} {'type': 'loss', 'content': 0.0006013158708810806, 'timestamp': '2025-09-10 02:25:25.477041', 'step': 4520, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 368], 'flops': 10916148434240}, 'timestamp': '2025-09-10 02:25:25.510533', 'step': 4520, 'epoch': 2} {'type': 'loss', 'content': 0.004607627633959055, 'timestamp': '2025-09-10 02:25:25.523661', 'step': 4521, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 7593922352128}, 'timestamp': '2025-09-10 02:25:25.555730', 'step': 4521, 'epoch': 2} {'type': 'loss', 'content': 0.0005228667287155986, 'timestamp': '2025-09-10 02:25:25.563370', 'step': 4522, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 8068526078144}, 'timestamp': '2025-09-10 02:25:25.594818', 'step': 4522, 'epoch': 2} {'type': 'loss', 'content': 0.0018962175818160176, 'timestamp': '2025-09-10 02:25:25.604537', 'step': 4523, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 8068526078144}, 'timestamp': '2025-09-10 02:25:25.635292', 'step': 4523, 'epoch': 2} {'type': 'loss', 'content': 0.0010046407114714384, 'timestamp': '2025-09-10 02:25:25.666066', 'step': 4524, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 8068526078144}, 'timestamp': '2025-09-10 02:25:25.698133', 'step': 4524, 'epoch': 2} {'type': 'loss', 'content': 0.0004124719125684351, 'timestamp': '2025-09-10 02:25:25.705611', 'step': 4525, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 5695507448064}, 'timestamp': '2025-09-10 02:25:25.737235', 'step': 4525, 'epoch': 2} {'type': 'loss', 'content': 0.00014472127077169716, 'timestamp': '2025-09-10 02:25:25.741466', 'step': 4526, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 6170111174080}, 'timestamp': '2025-09-10 02:25:25.772457', 'step': 4526, 'epoch': 2} {'type': 'loss', 'content': 0.0033601843751966953, 'timestamp': '2025-09-10 02:25:25.778985', 'step': 4527, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 8068526078144}, 'timestamp': '2025-09-10 02:25:25.811525', 'step': 4527, 'epoch': 2} {'type': 'loss', 'content': 0.00247010076418519, 'timestamp': '2025-09-10 02:25:25.842144', 'step': 4528, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 6170111174080}, 'timestamp': '2025-09-10 02:25:25.873629', 'step': 4528, 'epoch': 2} {'type': 'loss', 'content': 0.03199951723217964, 'timestamp': '2025-09-10 02:25:25.877737', 'step': 4529, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 7119318626112}, 'timestamp': '2025-09-10 02:25:25.911048', 'step': 4529, 'epoch': 2} {'type': 'loss', 'content': 0.004257894121110439, 'timestamp': '2025-09-10 02:25:25.918120', 'step': 4530, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 6644714900096}, 'timestamp': '2025-09-10 02:25:25.949885', 'step': 4530, 'epoch': 2} {'type': 'loss', 'content': 0.0010970250004902482, 'timestamp': '2025-09-10 02:25:25.956494', 'step': 4531, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 8543129804160}, 'timestamp': '2025-09-10 02:25:25.988518', 'step': 4531, 'epoch': 2} {'type': 'loss', 'content': 0.0010734976967796683, 'timestamp': '2025-09-10 02:25:26.019911', 'step': 4532, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 320], 'flops': 9492337256192}, 'timestamp': '2025-09-10 02:25:26.051703', 'step': 4532, 'epoch': 2} {'type': 'loss', 'content': 0.0022549789864569902, 'timestamp': '2025-09-10 02:25:26.061138', 'step': 4533, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 176], 'flops': 5220903722048}, 'timestamp': '2025-09-10 02:25:26.092505', 'step': 4533, 'epoch': 2} {'type': 'loss', 'content': 0.005263431929051876, 'timestamp': '2025-09-10 02:25:26.095987', 'step': 4534, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 4746299996032}, 'timestamp': '2025-09-10 02:25:26.127450', 'step': 4534, 'epoch': 2} {'type': 'loss', 'content': 0.00026051278109662235, 'timestamp': '2025-09-10 02:25:26.130043', 'step': 4535, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 4746299996032}, 'timestamp': '2025-09-10 02:25:26.168166', 'step': 4535, 'epoch': 2} {'type': 'loss', 'content': 0.0022305804304778576, 'timestamp': '2025-09-10 02:25:26.191905', 'step': 4536, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 336], 'flops': 9966940982208}, 'timestamp': '2025-09-10 02:25:26.224731', 'step': 4536, 'epoch': 2} {'type': 'loss', 'content': 0.0032375783193856478, 'timestamp': '2025-09-10 02:25:26.237353', 'step': 4537, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 6644714900096}, 'timestamp': '2025-09-10 02:25:26.268987', 'step': 4537, 'epoch': 2} {'type': 'loss', 'content': 0.0010004842188209295, 'timestamp': '2025-09-10 02:25:26.275711', 'step': 4538, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 7593922352128}, 'timestamp': '2025-09-10 02:25:26.308250', 'step': 4538, 'epoch': 2} {'type': 'loss', 'content': 0.000708259001839906, 'timestamp': '2025-09-10 02:25:26.315660', 'step': 4539, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 176], 'flops': 5220903722048}, 'timestamp': '2025-09-10 02:25:26.347929', 'step': 4539, 'epoch': 2} {'type': 'loss', 'content': 0.020569520071148872, 'timestamp': '2025-09-10 02:25:26.372621', 'step': 4540, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 8068526078144}, 'timestamp': '2025-09-10 02:25:26.405003', 'step': 4540, 'epoch': 2} {'type': 'loss', 'content': 0.009725110605359077, 'timestamp': '2025-09-10 02:25:26.412294', 'step': 4541, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 176], 'flops': 5220903722048}, 'timestamp': '2025-09-10 02:25:26.443431', 'step': 4541, 'epoch': 2} {'type': 'loss', 'content': 0.003039458068087697, 'timestamp': '2025-09-10 02:25:26.447287', 'step': 4542, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 5695507448064}, 'timestamp': '2025-09-10 02:25:26.478145', 'step': 4542, 'epoch': 2} {'type': 'loss', 'content': 0.0049249157309532166, 'timestamp': '2025-09-10 02:25:26.482075', 'step': 4543, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 368], 'flops': 10916148434240}, 'timestamp': '2025-09-10 02:25:26.517540', 'step': 4543, 'epoch': 2} {'type': 'loss', 'content': 0.0006834098021499813, 'timestamp': '2025-09-10 02:25:26.552245', 'step': 4544, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 6170111174080}, 'timestamp': '2025-09-10 02:25:26.583654', 'step': 4544, 'epoch': 2} {'type': 'loss', 'content': 0.00034481266629882157, 'timestamp': '2025-09-10 02:25:26.587683', 'step': 4545, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 5695507448064}, 'timestamp': '2025-09-10 02:25:26.629070', 'step': 4545, 'epoch': 2} {'type': 'loss', 'content': 0.006935402750968933, 'timestamp': '2025-09-10 02:25:26.633268', 'step': 4546, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 7593922352128}, 'timestamp': '2025-09-10 02:25:26.664858', 'step': 4546, 'epoch': 2} {'type': 'loss', 'content': 0.006434209179133177, 'timestamp': '2025-09-10 02:25:26.672410', 'step': 4547, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 464], 'flops': 13763770790336}, 'timestamp': '2025-09-10 02:25:26.713922', 'step': 4547, 'epoch': 2} {'type': 'loss', 'content': 0.01216146256774664, 'timestamp': '2025-09-10 02:25:26.751858', 'step': 4548, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 4746299996032}, 'timestamp': '2025-09-10 02:25:26.783543', 'step': 4548, 'epoch': 2} {'type': 'loss', 'content': 0.008372723124921322, 'timestamp': '2025-09-10 02:25:26.786885', 'step': 4549, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 6644714900096}, 'timestamp': '2025-09-10 02:25:26.819839', 'step': 4549, 'epoch': 2} {'type': 'loss', 'content': 0.00022385262127500027, 'timestamp': '2025-09-10 02:25:26.826651', 'step': 4550, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 320], 'flops': 9492337256192}, 'timestamp': '2025-09-10 02:25:26.858276', 'step': 4550, 'epoch': 2} {'type': 'loss', 'content': 0.007002050522714853, 'timestamp': '2025-09-10 02:25:26.870536', 'step': 4551, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 7119318626112}, 'timestamp': '2025-09-10 02:25:26.903103', 'step': 4551, 'epoch': 2} {'type': 'loss', 'content': 0.0005396933993324637, 'timestamp': '2025-09-10 02:25:26.931106', 'step': 4552, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 8543129804160}, 'timestamp': '2025-09-10 02:25:26.962944', 'step': 4552, 'epoch': 2} {'type': 'loss', 'content': 0.0024629905819892883, 'timestamp': '2025-09-10 02:25:26.970969', 'step': 4553, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 176], 'flops': 5220903722048}, 'timestamp': '2025-09-10 02:25:27.002522', 'step': 4553, 'epoch': 2} {'type': 'loss', 'content': 0.00016223655256908387, 'timestamp': '2025-09-10 02:25:27.006138', 'step': 4554, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 4746299996032}, 'timestamp': '2025-09-10 02:25:27.037871', 'step': 4554, 'epoch': 2} {'type': 'loss', 'content': 0.0015956457937136292, 'timestamp': '2025-09-10 02:25:27.040497', 'step': 4555, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-10 02:25:27.071104', 'step': 4555, 'epoch': 2} {'type': 'loss', 'content': 0.0005312658613547683, 'timestamp': '2025-09-10 02:25:27.095029', 'step': 4556, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 5695507448064}, 'timestamp': '2025-09-10 02:25:27.127004', 'step': 4556, 'epoch': 2} {'type': 'loss', 'content': 0.002805754542350769, 'timestamp': '2025-09-10 02:25:27.129456', 'step': 4557, 'epoch': 2} {'type': 'flops', 'content': [{'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5695213340928}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3796808916480}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4429610391296}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4746011128704}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4746011128704}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6011614078336}, {'type': 'perplexity', 'in_batch_dim': [8, 736], 'batch_size': 8, 'flops': 14554433988352}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3796808916480}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4113209653888}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3796808916480}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4746011128704}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2847606704256}, {'type': 'perplexity', 'in_batch_dim': [8, 320], 'batch_size': 8, 'flops': 6328014815744}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5378812603520}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3796808916480}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4746011128704}, {'type': 'perplexity', 'in_batch_dim': [8, 352], 'batch_size': 8, 'flops': 6960816290560}, {'type': 'perplexity', 'in_batch_dim': [8, 384], 'batch_size': 8, 'flops': 7593617765376}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5378812603520}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4429610391296}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4113209653888}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5062411866112}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5062411866112}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4429610391296}, {'type': 'perplexity', 'in_batch_dim': [8, 480], 'batch_size': 8, 'flops': 9492022189824}, {'type': 'perplexity', 'in_batch_dim': [8, 448], 'batch_size': 8, 'flops': 8859220715008}, {'type': 'perplexity', 'in_batch_dim': [8, 320], 'batch_size': 8, 'flops': 6328014815744}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3796808916480}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6011614078336}, {'type': 'perplexity', 'in_batch_dim': [8, 320], 'batch_size': 8, 'flops': 6328014815744}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5695213340928}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4746011128704}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4429610391296}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5378812603520}, {'type': 'perplexity', 'in_batch_dim': [8, 544], 'batch_size': 8, 'flops': 10757625139456}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4429610391296}, {'type': 'perplexity', 'in_batch_dim': [8, 320], 'batch_size': 8, 'flops': 6328014815744}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4113209653888}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4113209653888}, {'type': 'perplexity', 'in_batch_dim': [8, 320], 'batch_size': 8, 'flops': 6328014815744}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3480408179072}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5062411866112}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6011614078336}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4429610391296}, {'type': 'perplexity', 'in_batch_dim': [8, 384], 'batch_size': 8, 'flops': 7593617765376}, {'type': 'perplexity', 'in_batch_dim': [8, 416], 'batch_size': 8, 'flops': 8226419240192}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4746011128704}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3480408179072}, {'type': 'perplexity', 'in_batch_dim': [8, 384], 'batch_size': 8, 'flops': 7593617765376}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3480408179072}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3480408179072}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5378812603520}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3796808916480}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4113209653888}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5062411866112}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5378812603520}, {'type': 'perplexity', 'in_batch_dim': [8, 320], 'batch_size': 8, 'flops': 6328014815744}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4429610391296}, {'type': 'perplexity', 'in_batch_dim': [8, 160], 'batch_size': 8, 'flops': 3164007441664}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4113209653888}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5695213340928}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3796808916480}, {'type': 'perplexity', 'in_batch_dim': [8, 160], 'batch_size': 8, 'flops': 3164007441664}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6011614078336}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5062411866112}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4429610391296}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3480408179072}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4746011128704}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3480408179072}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4113209653888}, {'type': 'perplexity', 'in_batch_dim': [8, 624], 'batch_size': 8, 'flops': 12339628826496}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4113209653888}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5695213340928}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5062411866112}, {'type': 'perplexity', 'in_batch_dim': [8, 384], 'batch_size': 8, 'flops': 7593617765376}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4429610391296}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4113209653888}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5062411866112}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4429610391296}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5695213340928}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3796808916480}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5378812603520}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4429610391296}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5378812603520}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3796808916480}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5695213340928}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4429610391296}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5062411866112}, {'type': 'perplexity', 'in_batch_dim': [8, 160], 'batch_size': 8, 'flops': 3164007441664}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4746011128704}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5695213340928}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3796808916480}, {'type': 'perplexity', 'in_batch_dim': [8, 416], 'batch_size': 8, 'flops': 8226419240192}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3796808916480}, {'type': 'perplexity', 'in_batch_dim': [8, 160], 'batch_size': 8, 'flops': 3164007441664}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4746011128704}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3796808916480}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2847606704256}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5378812603520}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4429610391296}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4113209653888}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4429610391296}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6011614078336}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4113209653888}, {'type': 'perplexity', 'in_batch_dim': [8, 320], 'batch_size': 8, 'flops': 6328014815744}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4746011128704}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4746011128704}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3796808916480}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5062411866112}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3480408179072}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3796808916480}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3480408179072}, {'type': 'perplexity', 'in_batch_dim': [8, 352], 'batch_size': 8, 'flops': 6960816290560}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5062411866112}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4429610391296}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6011614078336}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5062411866112}, {'type': 'perplexity', 'in_batch_dim': [8, 576], 'batch_size': 8, 'flops': 11390426614272}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4429610391296}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3796808916480}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6011614078336}, {'type': 'perplexity', 'in_batch_dim': [8, 432], 'batch_size': 8, 'flops': 8542819977600}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5378812603520}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5378812603520}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5062411866112}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4113209653888}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4113209653888}, {'type': 'perplexity', 'in_batch_dim': [8, 368], 'batch_size': 8, 'flops': 7277217027968}, {'type': 'perplexity', 'in_batch_dim': [8, 160], 'batch_size': 8, 'flops': 3164007441664}, {'type': 'perplexity', 'in_batch_dim': [8, 368], 'batch_size': 8, 'flops': 7277217027968}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3796808916480}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4746011128704}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4113209653888}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4429610391296}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5695213340928}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5695213340928}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5062411866112}, {'type': 'perplexity', 'in_batch_dim': [8, 432], 'batch_size': 8, 'flops': 8542819977600}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5062411866112}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4113209653888}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3480408179072}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3480408179072}, {'type': 'perplexity', 'in_batch_dim': [8, 160], 'batch_size': 8, 'flops': 3164007441664}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4113209653888}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3796808916480}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5062411866112}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 368], 'batch_size': 8, 'flops': 7277217027968}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5378812603520}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5378812603520}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5062411866112}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2847606704256}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4429610391296}, {'type': 'perplexity', 'in_batch_dim': [8, 320], 'batch_size': 8, 'flops': 6328014815744}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3796808916480}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4429610391296}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4113209653888}, {'type': 'perplexity', 'in_batch_dim': [8, 384], 'batch_size': 8, 'flops': 7593617765376}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5378812603520}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4746011128704}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3480408179072}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5062411866112}, {'type': 'perplexity', 'in_batch_dim': [8, 320], 'batch_size': 8, 'flops': 6328014815744}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5378812603520}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4113209653888}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4113209653888}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5695213340928}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3480408179072}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6011614078336}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4746011128704}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5378812603520}, {'type': 'perplexity', 'in_batch_dim': [8, 320], 'batch_size': 8, 'flops': 6328014815744}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4429610391296}, {'type': 'perplexity', 'in_batch_dim': [8, 320], 'batch_size': 8, 'flops': 6328014815744}, {'type': 'perplexity', 'in_batch_dim': [8, 416], 'batch_size': 8, 'flops': 8226419240192}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5062411866112}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5062411866112}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2847606704256}, {'type': 'perplexity', 'in_batch_dim': [8, 336], 'batch_size': 8, 'flops': 6644415553152}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6011614078336}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5062411866112}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4113209653888}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5695213340928}, {'type': 'perplexity', 'in_batch_dim': [8, 416], 'batch_size': 8, 'flops': 8226419240192}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3796808916480}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5695213340928}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4746011128704}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4429610391296}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4113209653888}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3480408179072}, {'type': 'perplexity', 'in_batch_dim': [8, 320], 'batch_size': 8, 'flops': 6328014815744}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3480408179072}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5378812603520}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6011614078336}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4429610391296}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5695213340928}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5695213340928}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3480408179072}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4429610391296}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5062411866112}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3480408179072}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4113209653888}, {'type': 'perplexity', 'in_batch_dim': [8, 352], 'batch_size': 8, 'flops': 6960816290560}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4113209653888}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2847606704256}, {'type': 'perplexity', 'in_batch_dim': [8, 160], 'batch_size': 8, 'flops': 3164007441664}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3796808916480}, {'type': 'perplexity', 'in_batch_dim': [8, 496], 'batch_size': 8, 'flops': 9808422927232}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3480408179072}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4113209653888}, {'type': 'perplexity', 'in_batch_dim': [8, 544], 'batch_size': 8, 'flops': 10757625139456}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6011614078336}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5378812603520}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5695213340928}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4746011128704}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5695213340928}, {'type': 'perplexity', 'in_batch_dim': [8, 384], 'batch_size': 8, 'flops': 7593617765376}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3480408179072}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5378812603520}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3480408179072}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5378812603520}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5062411866112}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3796808916480}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5062411866112}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5695213340928}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5378812603520}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4429610391296}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5062411866112}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3480408179072}, {'type': 'perplexity', 'in_batch_dim': [8, 656], 'batch_size': 8, 'flops': 12972430301312}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5062411866112}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3796808916480}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3480408179072}, {'type': 'perplexity', 'in_batch_dim': [8, 160], 'batch_size': 8, 'flops': 3164007441664}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5695213340928}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6011614078336}, {'type': 'perplexity', 'in_batch_dim': [8, 320], 'batch_size': 8, 'flops': 6328014815744}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5062411866112}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5062411866112}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5695213340928}, {'type': 'perplexity', 'in_batch_dim': [8, 320], 'batch_size': 8, 'flops': 6328014815744}, {'type': 'perplexity', 'in_batch_dim': [8, 384], 'batch_size': 8, 'flops': 7593617765376}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5062411866112}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4746011128704}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4113209653888}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4746011128704}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4746011128704}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3480408179072}, {'type': 'perplexity', 'in_batch_dim': [8, 320], 'batch_size': 8, 'flops': 6328014815744}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5695213340928}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5062411866112}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4113209653888}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4746011128704}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2847606704256}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3796808916480}, {'type': 'perplexity', 'in_batch_dim': [8, 336], 'batch_size': 8, 'flops': 6644415553152}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4113209653888}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4746011128704}, {'type': 'perplexity', 'in_batch_dim': [8, 496], 'batch_size': 8, 'flops': 9808422927232}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5378812603520}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5378812603520}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4429610391296}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5062411866112}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6011614078336}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4746011128704}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3796808916480}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4113209653888}, {'type': 'perplexity', 'in_batch_dim': [8, 496], 'batch_size': 8, 'flops': 9808422927232}, {'type': 'perplexity', 'in_batch_dim': [8, 448], 'batch_size': 8, 'flops': 8859220715008}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6011614078336}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3796808916480}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3480408179072}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5062411866112}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4429610391296}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4113209653888}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4746011128704}, {'type': 'perplexity', 'in_batch_dim': [8, 320], 'batch_size': 8, 'flops': 6328014815744}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6011614078336}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3480408179072}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3796808916480}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3796808916480}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4746011128704}, {'type': 'perplexity', 'in_batch_dim': [8, 416], 'batch_size': 8, 'flops': 8226419240192}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5695213340928}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6011614078336}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6011614078336}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4746011128704}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3480408179072}, {'type': 'perplexity', 'in_batch_dim': [8, 160], 'batch_size': 8, 'flops': 3164007441664}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5695213340928}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3480408179072}, {'type': 'perplexity', 'in_batch_dim': [8, 400], 'batch_size': 8, 'flops': 7910018502784}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4429610391296}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5378812603520}, {'type': 'perplexity', 'in_batch_dim': [8, 400], 'batch_size': 8, 'flops': 7910018502784}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5378812603520}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3480408179072}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6011614078336}, {'type': 'perplexity', 'in_batch_dim': [8, 160], 'batch_size': 8, 'flops': 3164007441664}, {'type': 'perplexity', 'in_batch_dim': [8, 352], 'batch_size': 8, 'flops': 6960816290560}, {'type': 'perplexity', 'in_batch_dim': [8, 320], 'batch_size': 8, 'flops': 6328014815744}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5062411866112}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5062411866112}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4113209653888}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4429610391296}, {'type': 'perplexity', 'in_batch_dim': [8, 352], 'batch_size': 8, 'flops': 6960816290560}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4113209653888}, {'type': 'perplexity', 'in_batch_dim': [8, 160], 'batch_size': 8, 'flops': 3164007441664}, {'type': 'perplexity', 'in_batch_dim': [8, 336], 'batch_size': 8, 'flops': 6644415553152}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4429610391296}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3796808916480}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5378812603520}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5378812603520}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4113209653888}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5378812603520}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3796808916480}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5378812603520}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5378812603520}, {'type': 'perplexity', 'in_batch_dim': [8, 160], 'batch_size': 8, 'flops': 3164007441664}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3480408179072}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6011614078336}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5695213340928}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5378812603520}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5378812603520}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4429610391296}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4746011128704}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6011614078336}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5062411866112}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6011614078336}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5378812603520}, {'type': 'perplexity', 'in_batch_dim': [8, 160], 'batch_size': 8, 'flops': 3164007441664}, {'type': 'perplexity', 'in_batch_dim': [8, 160], 'batch_size': 8, 'flops': 3164007441664}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5062411866112}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5062411866112}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3796808916480}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6011614078336}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4113209653888}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4746011128704}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5378812603520}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3480408179072}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3796808916480}, {'type': 'perplexity', 'in_batch_dim': [8, 320], 'batch_size': 8, 'flops': 6328014815744}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5062411866112}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4746011128704}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5062411866112}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4746011128704}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6011614078336}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3796808916480}, {'type': 'perplexity', 'in_batch_dim': [8, 160], 'batch_size': 8, 'flops': 3164007441664}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2847606704256}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4429610391296}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4746011128704}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5062411866112}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5378812603520}, {'type': 'perplexity', 'in_batch_dim': [8, 464], 'batch_size': 8, 'flops': 9175621452416}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4113209653888}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3480408179072}, {'type': 'perplexity', 'in_batch_dim': [8, 384], 'batch_size': 8, 'flops': 7593617765376}, {'type': 'perplexity', 'in_batch_dim': [8, 448], 'batch_size': 8, 'flops': 8859220715008}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5378812603520}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4113209653888}, {'type': 'perplexity', 'in_batch_dim': [8, 320], 'batch_size': 8, 'flops': 6328014815744}, {'type': 'perplexity', 'in_batch_dim': [8, 560], 'batch_size': 8, 'flops': 11074025876864}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5378812603520}, {'type': 'perplexity', 'in_batch_dim': [8, 160], 'batch_size': 8, 'flops': 3164007441664}, {'type': 'perplexity', 'in_batch_dim': [8, 320], 'batch_size': 8, 'flops': 6328014815744}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5062411866112}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6011614078336}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4746011128704}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6011614078336}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3796808916480}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4429610391296}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4113209653888}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4746011128704}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3480408179072}, {'type': 'perplexity', 'in_batch_dim': [8, 384], 'batch_size': 8, 'flops': 7593617765376}, {'type': 'perplexity', 'in_batch_dim': [8, 576], 'batch_size': 8, 'flops': 11390426614272}, {'type': 'perplexity', 'in_batch_dim': [8, 160], 'batch_size': 8, 'flops': 3164007441664}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4746011128704}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4746011128704}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4429610391296}, {'type': 'perplexity', 'in_batch_dim': [8, 336], 'batch_size': 8, 'flops': 6644415553152}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4746011128704}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3796808916480}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3796808916480}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5062411866112}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5378812603520}, {'type': 'perplexity', 'in_batch_dim': [8, 368], 'batch_size': 8, 'flops': 7277217027968}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3480408179072}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4113209653888}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4746011128704}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3480408179072}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4746011128704}, {'type': 'perplexity', 'in_batch_dim': [8, 1168], 'batch_size': 8, 'flops': 23097253898368}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5062411866112}, {'type': 'perplexity', 'in_batch_dim': [8, 160], 'batch_size': 8, 'flops': 3164007441664}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4746011128704}, {'type': 'perplexity', 'in_batch_dim': [8, 640], 'batch_size': 8, 'flops': 12656029563904}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5378812603520}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4429610391296}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4429610391296}, {'type': 'perplexity', 'in_batch_dim': [8, 384], 'batch_size': 8, 'flops': 7593617765376}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3480408179072}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4113209653888}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3796808916480}, {'type': 'perplexity', 'in_batch_dim': [8, 336], 'batch_size': 8, 'flops': 6644415553152}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3480408179072}, {'type': 'perplexity', 'in_batch_dim': [6, 208], 'batch_size': 8, 'flops': 4113209653888}], 'timestamp': '2025-09-10 02:25:37.248941', 'step': 4557, 'epoch': 2} {'type': 'pplx', 'content': 22196068.675676793, 'timestamp': '2025-09-10 02:25:37.251722', 'step': 4557, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 5695507448064}, 'timestamp': '2025-09-10 02:25:37.281897', 'step': 4557, 'epoch': 2} {'type': 'loss', 'content': 0.0010733563685789704, 'timestamp': '2025-09-10 02:25:37.285638', 'step': 4558, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 7593922352128}, 'timestamp': '2025-09-10 02:25:37.317115', 'step': 4558, 'epoch': 2} {'type': 'loss', 'content': 5.339493145584129e-05, 'timestamp': '2025-09-10 02:25:37.324842', 'step': 4559, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 6170111174080}, 'timestamp': '2025-09-10 02:25:37.357123', 'step': 4559, 'epoch': 2} {'type': 'loss', 'content': 0.012812472879886627, 'timestamp': '2025-09-10 02:25:37.384863', 'step': 4560, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 176], 'flops': 5220903722048}, 'timestamp': '2025-09-10 02:25:37.415764', 'step': 4560, 'epoch': 2} {'type': 'loss', 'content': 0.008561398833990097, 'timestamp': '2025-09-10 02:25:37.418114', 'step': 4561, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 6644714900096}, 'timestamp': '2025-09-10 02:25:37.449223', 'step': 4561, 'epoch': 2} {'type': 'loss', 'content': 0.00012836763926316053, 'timestamp': '2025-09-10 02:25:37.456292', 'step': 4562, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 304], 'flops': 9017733530176}, 'timestamp': '2025-09-10 02:25:37.486863', 'step': 4562, 'epoch': 2} {'type': 'loss', 'content': 0.001862238277681172, 'timestamp': '2025-09-10 02:25:37.499099', 'step': 4563, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 6644714900096}, 'timestamp': '2025-09-10 02:25:37.529385', 'step': 4563, 'epoch': 2} {'type': 'loss', 'content': 0.0009596589952707291, 'timestamp': '2025-09-10 02:25:37.557360', 'step': 4564, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 8068526078144}, 'timestamp': '2025-09-10 02:25:37.588280', 'step': 4564, 'epoch': 2} {'type': 'loss', 'content': 0.0013999083312228322, 'timestamp': '2025-09-10 02:25:37.596221', 'step': 4565, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 6170111174080}, 'timestamp': '2025-09-10 02:25:37.627323', 'step': 4565, 'epoch': 2} {'type': 'loss', 'content': 0.0009914558613672853, 'timestamp': '2025-09-10 02:25:37.634287', 'step': 4566, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 7593922352128}, 'timestamp': '2025-09-10 02:25:37.664921', 'step': 4566, 'epoch': 2} {'type': 'loss', 'content': 0.002980300458148122, 'timestamp': '2025-09-10 02:25:37.672667', 'step': 4567, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 7593922352128}, 'timestamp': '2025-09-10 02:25:37.703443', 'step': 4567, 'epoch': 2} {'type': 'loss', 'content': 0.00020019218209199607, 'timestamp': '2025-09-10 02:25:37.732258', 'step': 4568, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 6170111174080}, 'timestamp': '2025-09-10 02:25:37.763055', 'step': 4568, 'epoch': 2} {'type': 'loss', 'content': 0.0008754459558986127, 'timestamp': '2025-09-10 02:25:37.767499', 'step': 4569, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 6644714900096}, 'timestamp': '2025-09-10 02:25:37.797578', 'step': 4569, 'epoch': 2} {'type': 'loss', 'content': 0.0008850363665260375, 'timestamp': '2025-09-10 02:25:37.804781', 'step': 4570, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 7119318626112}, 'timestamp': '2025-09-10 02:25:37.835457', 'step': 4570, 'epoch': 2} {'type': 'loss', 'content': 0.0006145837833173573, 'timestamp': '2025-09-10 02:25:37.842706', 'step': 4571, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 8543129804160}, 'timestamp': '2025-09-10 02:25:37.873094', 'step': 4571, 'epoch': 2} {'type': 'loss', 'content': 0.0006157992756925523, 'timestamp': '2025-09-10 02:25:37.904941', 'step': 4572, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 7593922352128}, 'timestamp': '2025-09-10 02:25:37.935999', 'step': 4572, 'epoch': 2} {'type': 'loss', 'content': 0.00951096136122942, 'timestamp': '2025-09-10 02:25:37.940852', 'step': 4573, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 7593922352128}, 'timestamp': '2025-09-10 02:25:37.972196', 'step': 4573, 'epoch': 2} {'type': 'loss', 'content': 0.034846000373363495, 'timestamp': '2025-09-10 02:25:37.979654', 'step': 4574, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 5695507448064}, 'timestamp': '2025-09-10 02:25:38.011724', 'step': 4574, 'epoch': 2} {'type': 'loss', 'content': 0.0007103482494130731, 'timestamp': '2025-09-10 02:25:38.015941', 'step': 4575, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 6644714900096}, 'timestamp': '2025-09-10 02:25:38.047396', 'step': 4575, 'epoch': 2} {'type': 'loss', 'content': 0.0004654258373193443, 'timestamp': '2025-09-10 02:25:38.075283', 'step': 4576, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 304], 'flops': 9017733530176}, 'timestamp': '2025-09-10 02:25:38.109652', 'step': 4576, 'epoch': 2} {'type': 'loss', 'content': 0.002614832716062665, 'timestamp': '2025-09-10 02:25:38.118519', 'step': 4577, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 7119318626112}, 'timestamp': '2025-09-10 02:25:38.150260', 'step': 4577, 'epoch': 2} {'type': 'loss', 'content': 0.017886726185679436, 'timestamp': '2025-09-10 02:25:38.157172', 'step': 4578, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 7119318626112}, 'timestamp': '2025-09-10 02:25:38.188687', 'step': 4578, 'epoch': 2} {'type': 'loss', 'content': 0.00023473672627005726, 'timestamp': '2025-09-10 02:25:38.195723', 'step': 4579, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 6170111174080}, 'timestamp': '2025-09-10 02:25:38.227708', 'step': 4579, 'epoch': 2} {'type': 'loss', 'content': 0.0004447103419806808, 'timestamp': '2025-09-10 02:25:38.255265', 'step': 4580, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 8068526078144}, 'timestamp': '2025-09-10 02:25:38.288925', 'step': 4580, 'epoch': 2} {'type': 'loss', 'content': 0.0046869367361068726, 'timestamp': '2025-09-10 02:25:38.295757', 'step': 4581, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 8068526078144}, 'timestamp': '2025-09-10 02:25:38.327961', 'step': 4581, 'epoch': 2} {'type': 'loss', 'content': 0.0004977317294105887, 'timestamp': '2025-09-10 02:25:38.337638', 'step': 4582, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 304], 'flops': 9017733530176}, 'timestamp': '2025-09-10 02:25:38.370544', 'step': 4582, 'epoch': 2} {'type': 'loss', 'content': 0.0006395676755346358, 'timestamp': '2025-09-10 02:25:38.382421', 'step': 4583, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 5695507448064}, 'timestamp': '2025-09-10 02:25:38.413797', 'step': 4583, 'epoch': 2} {'type': 'loss', 'content': 0.0017181943403556943, 'timestamp': '2025-09-10 02:25:38.442673', 'step': 4584, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 6644714900096}, 'timestamp': '2025-09-10 02:25:38.474975', 'step': 4584, 'epoch': 2} {'type': 'loss', 'content': 0.00017127768660429865, 'timestamp': '2025-09-10 02:25:38.479489', 'step': 4585, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-10 02:25:38.510902', 'step': 4585, 'epoch': 2} {'type': 'loss', 'content': 0.0008789977291598916, 'timestamp': '2025-09-10 02:25:38.514803', 'step': 4586, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 6170111174080}, 'timestamp': '2025-09-10 02:25:38.546954', 'step': 4586, 'epoch': 2} {'type': 'loss', 'content': 0.000202530252863653, 'timestamp': '2025-09-10 02:25:38.553713', 'step': 4587, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 480], 'flops': 14238374516352}, 'timestamp': '2025-09-10 02:25:38.595580', 'step': 4587, 'epoch': 2} {'type': 'loss', 'content': 0.02567419223487377, 'timestamp': '2025-09-10 02:25:38.633726', 'step': 4588, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 8068526078144}, 'timestamp': '2025-09-10 02:25:38.665670', 'step': 4588, 'epoch': 2} {'type': 'loss', 'content': 0.0034661719109863043, 'timestamp': '2025-09-10 02:25:38.673159', 'step': 4589, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 7593922352128}, 'timestamp': '2025-09-10 02:25:38.705050', 'step': 4589, 'epoch': 2} {'type': 'loss', 'content': 0.0012347496813163161, 'timestamp': '2025-09-10 02:25:38.712603', 'step': 4590, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 7119318626112}, 'timestamp': '2025-09-10 02:25:38.745491', 'step': 4590, 'epoch': 2} {'type': 'loss', 'content': 0.0001653311337577179, 'timestamp': '2025-09-10 02:25:38.752873', 'step': 4591, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 176], 'flops': 5220903722048}, 'timestamp': '2025-09-10 02:25:38.784280', 'step': 4591, 'epoch': 2} {'type': 'loss', 'content': 0.0006502936012111604, 'timestamp': '2025-09-10 02:25:38.809196', 'step': 4592, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 416], 'flops': 12339959612288}, 'timestamp': '2025-09-10 02:25:38.846360', 'step': 4592, 'epoch': 2} {'type': 'loss', 'content': 0.00027454280643723905, 'timestamp': '2025-09-10 02:25:38.861692', 'step': 4593, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 5695507448064}, 'timestamp': '2025-09-10 02:25:38.893021', 'step': 4593, 'epoch': 2} {'type': 'loss', 'content': 0.0001545843406347558, 'timestamp': '2025-09-10 02:25:38.897115', 'step': 4594, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 5695507448064}, 'timestamp': '2025-09-10 02:25:38.927715', 'step': 4594, 'epoch': 2} {'type': 'loss', 'content': 0.05455930903553963, 'timestamp': '2025-09-10 02:25:38.932332', 'step': 4595, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 7593922352128}, 'timestamp': '2025-09-10 02:25:38.964182', 'step': 4595, 'epoch': 2} {'type': 'loss', 'content': 0.00358793162740767, 'timestamp': '2025-09-10 02:25:38.992494', 'step': 4596, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 8543129804160}, 'timestamp': '2025-09-10 02:25:39.024115', 'step': 4596, 'epoch': 2} {'type': 'loss', 'content': 0.0006846991600468755, 'timestamp': '2025-09-10 02:25:39.032247', 'step': 4597, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 7119318626112}, 'timestamp': '2025-09-10 02:25:39.065109', 'step': 4597, 'epoch': 2} {'type': 'loss', 'content': 0.000762683164793998, 'timestamp': '2025-09-10 02:25:39.072321', 'step': 4598, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 336], 'flops': 9966940982208}, 'timestamp': '2025-09-10 02:25:39.105738', 'step': 4598, 'epoch': 2} {'type': 'loss', 'content': 0.0005580178694799542, 'timestamp': '2025-09-10 02:25:39.119148', 'step': 4599, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-10 02:25:39.149868', 'step': 4599, 'epoch': 2} {'type': 'loss', 'content': 0.04828514903783798, 'timestamp': '2025-09-10 02:25:39.174906', 'step': 4600, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 6644714900096}, 'timestamp': '2025-09-10 02:25:39.212885', 'step': 4600, 'epoch': 2} {'type': 'loss', 'content': 0.0019080432830378413, 'timestamp': '2025-09-10 02:25:39.217720', 'step': 4601, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 6644714900096}, 'timestamp': '2025-09-10 02:25:39.249614', 'step': 4601, 'epoch': 2} {'type': 'loss', 'content': 0.00021091777307447046, 'timestamp': '2025-09-10 02:25:39.256791', 'step': 4602, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 496], 'flops': 14712978242368}, 'timestamp': '2025-09-10 02:25:39.300909', 'step': 4602, 'epoch': 2} {'type': 'loss', 'content': 0.0009871380170807242, 'timestamp': '2025-09-10 02:25:39.318473', 'step': 4603, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 8068526078144}, 'timestamp': '2025-09-10 02:25:39.350082', 'step': 4603, 'epoch': 2} {'type': 'loss', 'content': 0.0004607336886692792, 'timestamp': '2025-09-10 02:25:39.380362', 'step': 4604, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 8068526078144}, 'timestamp': '2025-09-10 02:25:39.411906', 'step': 4604, 'epoch': 2} {'type': 'loss', 'content': 0.0006659630453214049, 'timestamp': '2025-09-10 02:25:39.419587', 'step': 4605, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 6170111174080}, 'timestamp': '2025-09-10 02:25:39.449968', 'step': 4605, 'epoch': 2} {'type': 'loss', 'content': 0.001328265992924571, 'timestamp': '2025-09-10 02:25:39.456704', 'step': 4606, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-10 02:25:39.487495', 'step': 4606, 'epoch': 2} {'type': 'loss', 'content': 0.00017410985310561955, 'timestamp': '2025-09-10 02:25:39.490210', 'step': 4607, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 7593922352128}, 'timestamp': '2025-09-10 02:25:39.521017', 'step': 4607, 'epoch': 2} {'type': 'loss', 'content': 0.0004943975363858044, 'timestamp': '2025-09-10 02:25:39.549176', 'step': 4608, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 416], 'flops': 12339959612288}, 'timestamp': '2025-09-10 02:25:39.587044', 'step': 4608, 'epoch': 2} {'type': 'loss', 'content': 0.005744884721934795, 'timestamp': '2025-09-10 02:25:39.602492', 'step': 4609, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 8543129804160}, 'timestamp': '2025-09-10 02:25:39.634627', 'step': 4609, 'epoch': 2} {'type': 'loss', 'content': 0.0007691961363889277, 'timestamp': '2025-09-10 02:25:39.645597', 'step': 4610, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 6644714900096}, 'timestamp': '2025-09-10 02:25:39.676945', 'step': 4610, 'epoch': 2} {'type': 'loss', 'content': 0.00013593518815468997, 'timestamp': '2025-09-10 02:25:39.684017', 'step': 4611, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 7119318626112}, 'timestamp': '2025-09-10 02:25:39.715445', 'step': 4611, 'epoch': 2} {'type': 'loss', 'content': 0.0014134369557723403, 'timestamp': '2025-09-10 02:25:39.743812', 'step': 4612, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 7593922352128}, 'timestamp': '2025-09-10 02:25:39.775669', 'step': 4612, 'epoch': 2} {'type': 'loss', 'content': 0.0025131232105195522, 'timestamp': '2025-09-10 02:25:39.781144', 'step': 4613, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 368], 'flops': 10916148434240}, 'timestamp': '2025-09-10 02:25:39.815817', 'step': 4613, 'epoch': 2} {'type': 'loss', 'content': 0.0002813456521835178, 'timestamp': '2025-09-10 02:25:39.829621', 'step': 4614, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 6644714900096}, 'timestamp': '2025-09-10 02:25:39.860677', 'step': 4614, 'epoch': 2} {'type': 'loss', 'content': 0.003985443152487278, 'timestamp': '2025-09-10 02:25:39.868027', 'step': 4615, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 7119318626112}, 'timestamp': '2025-09-10 02:25:39.899801', 'step': 4615, 'epoch': 2} {'type': 'loss', 'content': 6.440089055104181e-05, 'timestamp': '2025-09-10 02:25:39.928044', 'step': 4616, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 6644714900096}, 'timestamp': '2025-09-10 02:25:39.958592', 'step': 4616, 'epoch': 2} {'type': 'loss', 'content': 0.004450418520718813, 'timestamp': '2025-09-10 02:25:39.963542', 'step': 4617, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 8068526078144}, 'timestamp': '2025-09-10 02:25:39.994245', 'step': 4617, 'epoch': 2} {'type': 'loss', 'content': 0.0024432761128991842, 'timestamp': '2025-09-10 02:25:40.004305', 'step': 4618, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 176], 'flops': 5220903722048}, 'timestamp': '2025-09-10 02:25:40.034812', 'step': 4618, 'epoch': 2} {'type': 'loss', 'content': 0.008134338073432446, 'timestamp': '2025-09-10 02:25:40.038920', 'step': 4619, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 400], 'flops': 11865355886272}, 'timestamp': '2025-09-10 02:25:40.076477', 'step': 4619, 'epoch': 2} {'type': 'loss', 'content': 0.0038712576497346163, 'timestamp': '2025-09-10 02:25:40.113034', 'step': 4620, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 6644714900096}, 'timestamp': '2025-09-10 02:25:40.143839', 'step': 4620, 'epoch': 2} {'type': 'loss', 'content': 0.00024131852842401713, 'timestamp': '2025-09-10 02:25:40.148586', 'step': 4621, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 8543129804160}, 'timestamp': '2025-09-10 02:25:40.179389', 'step': 4621, 'epoch': 2} {'type': 'loss', 'content': 0.0002276496816193685, 'timestamp': '2025-09-10 02:25:40.190457', 'step': 4622, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 336], 'flops': 9966940982208}, 'timestamp': '2025-09-10 02:25:40.227228', 'step': 4622, 'epoch': 2} {'type': 'loss', 'content': 0.00045795520418323576, 'timestamp': '2025-09-10 02:25:40.240617', 'step': 4623, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 304], 'flops': 9017733530176}, 'timestamp': '2025-09-10 02:25:40.272382', 'step': 4623, 'epoch': 2} {'type': 'loss', 'content': 0.0020052941981703043, 'timestamp': '2025-09-10 02:25:40.305646', 'step': 4624, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 7119318626112}, 'timestamp': '2025-09-10 02:25:40.336456', 'step': 4624, 'epoch': 2} {'type': 'loss', 'content': 0.0003010353248100728, 'timestamp': '2025-09-10 02:25:40.341626', 'step': 4625, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 304], 'flops': 9017733530176}, 'timestamp': '2025-09-10 02:25:40.372635', 'step': 4625, 'epoch': 2} {'type': 'loss', 'content': 0.0003616653848439455, 'timestamp': '2025-09-10 02:25:40.384970', 'step': 4626, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 6644714900096}, 'timestamp': '2025-09-10 02:25:40.415819', 'step': 4626, 'epoch': 2} {'type': 'loss', 'content': 0.0009788486640900373, 'timestamp': '2025-09-10 02:25:40.422896', 'step': 4627, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 336], 'flops': 9966940982208}, 'timestamp': '2025-09-10 02:25:40.456481', 'step': 4627, 'epoch': 2} {'type': 'loss', 'content': 0.0018540980527177453, 'timestamp': '2025-09-10 02:25:40.490768', 'step': 4628, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 320], 'flops': 9492337256192}, 'timestamp': '2025-09-10 02:25:40.522011', 'step': 4628, 'epoch': 2} {'type': 'loss', 'content': 0.00019449996761977673, 'timestamp': '2025-09-10 02:25:40.532556', 'step': 4629, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 6644714900096}, 'timestamp': '2025-09-10 02:25:40.563716', 'step': 4629, 'epoch': 2} {'type': 'loss', 'content': 0.001505575724877417, 'timestamp': '2025-09-10 02:25:40.570742', 'step': 4630, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-10 02:25:40.602398', 'step': 4630, 'epoch': 2} {'type': 'loss', 'content': 0.00010367255163146183, 'timestamp': '2025-09-10 02:25:40.604663', 'step': 4631, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 4746299996032}, 'timestamp': '2025-09-10 02:25:40.634796', 'step': 4631, 'epoch': 2} {'type': 'loss', 'content': 0.00021751046006102115, 'timestamp': '2025-09-10 02:25:40.658513', 'step': 4632, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 6644714900096}, 'timestamp': '2025-09-10 02:25:40.689780', 'step': 4632, 'epoch': 2} {'type': 'loss', 'content': 0.0005528530455194414, 'timestamp': '2025-09-10 02:25:40.694627', 'step': 4633, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 6644714900096}, 'timestamp': '2025-09-10 02:25:40.725402', 'step': 4633, 'epoch': 2} {'type': 'loss', 'content': 0.0006717692012898624, 'timestamp': '2025-09-10 02:25:40.732500', 'step': 4634, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 7119318626112}, 'timestamp': '2025-09-10 02:25:40.763952', 'step': 4634, 'epoch': 2} {'type': 'loss', 'content': 0.0003762389242183417, 'timestamp': '2025-09-10 02:25:40.771598', 'step': 4635, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 7119318626112}, 'timestamp': '2025-09-10 02:25:40.803947', 'step': 4635, 'epoch': 2} {'type': 'loss', 'content': 0.0006608268013224006, 'timestamp': '2025-09-10 02:25:40.832461', 'step': 4636, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 8068526078144}, 'timestamp': '2025-09-10 02:25:40.864000', 'step': 4636, 'epoch': 2} {'type': 'loss', 'content': 8.623411849839613e-05, 'timestamp': '2025-09-10 02:25:40.871941', 'step': 4637, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 6170111174080}, 'timestamp': '2025-09-10 02:25:40.903954', 'step': 4637, 'epoch': 2} {'type': 'loss', 'content': 0.0006122788763605058, 'timestamp': '2025-09-10 02:25:40.910802', 'step': 4638, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 384], 'flops': 11390752160256}, 'timestamp': '2025-09-10 02:25:40.945948', 'step': 4638, 'epoch': 2} {'type': 'loss', 'content': 0.002174076158553362, 'timestamp': '2025-09-10 02:25:40.959955', 'step': 4639, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 368], 'flops': 10916148434240}, 'timestamp': '2025-09-10 02:25:40.995398', 'step': 4639, 'epoch': 2} {'type': 'loss', 'content': 0.0002304811787325889, 'timestamp': '2025-09-10 02:25:41.030016', 'step': 4640, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 176], 'flops': 5220903722048}, 'timestamp': '2025-09-10 02:25:41.060128', 'step': 4640, 'epoch': 2} {'type': 'loss', 'content': 7.722365262452513e-05, 'timestamp': '2025-09-10 02:25:41.062407', 'step': 4641, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 368], 'flops': 10916148434240}, 'timestamp': '2025-09-10 02:25:41.096808', 'step': 4641, 'epoch': 2} {'type': 'loss', 'content': 0.00041160904220305383, 'timestamp': '2025-09-10 02:25:41.110676', 'step': 4642, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 6170111174080}, 'timestamp': '2025-09-10 02:25:41.141746', 'step': 4642, 'epoch': 2} {'type': 'loss', 'content': 0.0018459666753187776, 'timestamp': '2025-09-10 02:25:41.148706', 'step': 4643, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 7593922352128}, 'timestamp': '2025-09-10 02:25:41.180226', 'step': 4643, 'epoch': 2} {'type': 'loss', 'content': 0.0011864164844155312, 'timestamp': '2025-09-10 02:25:41.208859', 'step': 4644, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 176], 'flops': 5220903722048}, 'timestamp': '2025-09-10 02:25:41.239694', 'step': 4644, 'epoch': 2} {'type': 'loss', 'content': 5.6243621656904e-05, 'timestamp': '2025-09-10 02:25:41.241544', 'step': 4645, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 6170111174080}, 'timestamp': '2025-09-10 02:25:41.272194', 'step': 4645, 'epoch': 2} {'type': 'loss', 'content': 0.054455097764730453, 'timestamp': '2025-09-10 02:25:41.279208', 'step': 4646, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 176], 'flops': 5220903722048}, 'timestamp': '2025-09-10 02:25:41.309894', 'step': 4646, 'epoch': 2} {'type': 'loss', 'content': 0.0007867555250413716, 'timestamp': '2025-09-10 02:25:41.314022', 'step': 4647, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 320], 'flops': 9492337256192}, 'timestamp': '2025-09-10 02:25:41.346859', 'step': 4647, 'epoch': 2} {'type': 'loss', 'content': 0.00012387036986183375, 'timestamp': '2025-09-10 02:25:41.380311', 'step': 4648, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 6644714900096}, 'timestamp': '2025-09-10 02:25:41.413604', 'step': 4648, 'epoch': 2} {'type': 'loss', 'content': 0.00019573597819544375, 'timestamp': '2025-09-10 02:25:41.418594', 'step': 4649, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 7119318626112}, 'timestamp': '2025-09-10 02:25:41.450652', 'step': 4649, 'epoch': 2} {'type': 'loss', 'content': 7.041559729259461e-05, 'timestamp': '2025-09-10 02:25:41.458149', 'step': 4650, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 8068526078144}, 'timestamp': '2025-09-10 02:25:41.490198', 'step': 4650, 'epoch': 2} {'type': 'loss', 'content': 0.02323429472744465, 'timestamp': '2025-09-10 02:25:41.500100', 'step': 4651, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 8068526078144}, 'timestamp': '2025-09-10 02:25:41.531296', 'step': 4651, 'epoch': 2} {'type': 'loss', 'content': 0.0013165498385205865, 'timestamp': '2025-09-10 02:25:41.562358', 'step': 4652, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 5695507448064}, 'timestamp': '2025-09-10 02:25:41.593334', 'step': 4652, 'epoch': 2} {'type': 'loss', 'content': 0.0002063662832370028, 'timestamp': '2025-09-10 02:25:41.595699', 'step': 4653, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-10 02:25:41.625894', 'step': 4653, 'epoch': 2} {'type': 'loss', 'content': 0.011784272268414497, 'timestamp': '2025-09-10 02:25:41.628252', 'step': 4654, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 7119318626112}, 'timestamp': '2025-09-10 02:25:41.660002', 'step': 4654, 'epoch': 2} {'type': 'loss', 'content': 0.0023810744751244783, 'timestamp': '2025-09-10 02:25:41.667569', 'step': 4655, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 5695507448064}, 'timestamp': '2025-09-10 02:25:41.697803', 'step': 4655, 'epoch': 2} {'type': 'loss', 'content': 0.0012222749646753073, 'timestamp': '2025-09-10 02:25:41.723222', 'step': 4656, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 8543129804160}, 'timestamp': '2025-09-10 02:25:41.753995', 'step': 4656, 'epoch': 2} {'type': 'loss', 'content': 0.0006310855969786644, 'timestamp': '2025-09-10 02:25:41.762630', 'step': 4657, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 176], 'flops': 5220903722048}, 'timestamp': '2025-09-10 02:25:41.793181', 'step': 4657, 'epoch': 2} {'type': 'loss', 'content': 6.163497891975567e-05, 'timestamp': '2025-09-10 02:25:41.797539', 'step': 4658, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 176], 'flops': 5220903722048}, 'timestamp': '2025-09-10 02:25:41.829355', 'step': 4658, 'epoch': 2} {'type': 'loss', 'content': 0.00010095408651977777, 'timestamp': '2025-09-10 02:25:41.833453', 'step': 4659, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 8543129804160}, 'timestamp': '2025-09-10 02:25:41.865602', 'step': 4659, 'epoch': 2} {'type': 'loss', 'content': 0.00024290040892083198, 'timestamp': '2025-09-10 02:25:41.897535', 'step': 4660, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 336], 'flops': 9966940982208}, 'timestamp': '2025-09-10 02:25:41.929454', 'step': 4660, 'epoch': 2} {'type': 'loss', 'content': 0.0009246188565157354, 'timestamp': '2025-09-10 02:25:41.942119', 'step': 4661, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 7119318626112}, 'timestamp': '2025-09-10 02:25:41.972950', 'step': 4661, 'epoch': 2} {'type': 'loss', 'content': 0.0011847690911963582, 'timestamp': '2025-09-10 02:25:41.980351', 'step': 4662, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 400], 'flops': 11865355886272}, 'timestamp': '2025-09-10 02:25:42.019971', 'step': 4662, 'epoch': 2} {'type': 'loss', 'content': 0.0035353994462639093, 'timestamp': '2025-09-10 02:25:42.035617', 'step': 4663, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 7593922352128}, 'timestamp': '2025-09-10 02:25:42.066434', 'step': 4663, 'epoch': 2} {'type': 'loss', 'content': 0.0010591925820335746, 'timestamp': '2025-09-10 02:25:42.095085', 'step': 4664, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 480], 'flops': 14238374516352}, 'timestamp': '2025-09-10 02:25:42.134536', 'step': 4664, 'epoch': 2} {'type': 'loss', 'content': 0.00923153292387724, 'timestamp': '2025-09-10 02:25:42.151529', 'step': 4665, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 6644714900096}, 'timestamp': '2025-09-10 02:25:42.182270', 'step': 4665, 'epoch': 2} {'type': 'loss', 'content': 0.000438479648437351, 'timestamp': '2025-09-10 02:25:42.189395', 'step': 4666, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 5695507448064}, 'timestamp': '2025-09-10 02:25:42.226759', 'step': 4666, 'epoch': 2} {'type': 'loss', 'content': 0.004933382850140333, 'timestamp': '2025-09-10 02:25:42.231094', 'step': 4667, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 5695507448064}, 'timestamp': '2025-09-10 02:25:42.262207', 'step': 4667, 'epoch': 2} {'type': 'loss', 'content': 0.018754737451672554, 'timestamp': '2025-09-10 02:25:42.287551', 'step': 4668, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 6644714900096}, 'timestamp': '2025-09-10 02:25:42.319647', 'step': 4668, 'epoch': 2} {'type': 'loss', 'content': 0.0001720808504614979, 'timestamp': '2025-09-10 02:25:42.324427', 'step': 4669, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 6170111174080}, 'timestamp': '2025-09-10 02:25:42.354995', 'step': 4669, 'epoch': 2} {'type': 'loss', 'content': 0.0011062477715313435, 'timestamp': '2025-09-10 02:25:42.362087', 'step': 4670, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 8543129804160}, 'timestamp': '2025-09-10 02:25:42.392755', 'step': 4670, 'epoch': 2} {'type': 'loss', 'content': 0.00019575886835809797, 'timestamp': '2025-09-10 02:25:42.403589', 'step': 4671, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 8068526078144}, 'timestamp': '2025-09-10 02:25:42.434530', 'step': 4671, 'epoch': 2} {'type': 'loss', 'content': 0.005415527615696192, 'timestamp': '2025-09-10 02:25:42.465694', 'step': 4672, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 6170111174080}, 'timestamp': '2025-09-10 02:25:42.497197', 'step': 4672, 'epoch': 2} {'type': 'loss', 'content': 0.0010951546719297767, 'timestamp': '2025-09-10 02:25:42.501696', 'step': 4673, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:25:42.532796', 'step': 4673, 'epoch': 2} {'type': 'loss', 'content': 0.002216489752754569, 'timestamp': '2025-09-10 02:25:42.535146', 'step': 4674, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 176], 'flops': 5220903722048}, 'timestamp': '2025-09-10 02:25:42.566942', 'step': 4674, 'epoch': 2} {'type': 'loss', 'content': 0.0029746759682893753, 'timestamp': '2025-09-10 02:25:42.571135', 'step': 4675, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 320], 'flops': 9492337256192}, 'timestamp': '2025-09-10 02:25:42.603076', 'step': 4675, 'epoch': 2} {'type': 'loss', 'content': 0.0008459574310109019, 'timestamp': '2025-09-10 02:25:42.636460', 'step': 4676, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 4746299996032}, 'timestamp': '2025-09-10 02:25:42.667674', 'step': 4676, 'epoch': 2} {'type': 'loss', 'content': 0.005179825238883495, 'timestamp': '2025-09-10 02:25:42.669969', 'step': 4677, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 6170111174080}, 'timestamp': '2025-09-10 02:25:42.701536', 'step': 4677, 'epoch': 2} {'type': 'loss', 'content': 0.020527558401226997, 'timestamp': '2025-09-10 02:25:42.708350', 'step': 4678, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 352], 'flops': 10441544708224}, 'timestamp': '2025-09-10 02:25:42.743444', 'step': 4678, 'epoch': 2} {'type': 'loss', 'content': 0.0006391909555532038, 'timestamp': '2025-09-10 02:25:42.757207', 'step': 4679, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 352], 'flops': 10441544708224}, 'timestamp': '2025-09-10 02:25:42.791612', 'step': 4679, 'epoch': 2} {'type': 'loss', 'content': 0.00017153627413790673, 'timestamp': '2025-09-10 02:25:42.826189', 'step': 4680, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 6644714900096}, 'timestamp': '2025-09-10 02:25:42.857263', 'step': 4680, 'epoch': 2} {'type': 'loss', 'content': 6.232234591152519e-05, 'timestamp': '2025-09-10 02:25:42.862050', 'step': 4681, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 8543129804160}, 'timestamp': '2025-09-10 02:25:42.893169', 'step': 4681, 'epoch': 2} {'type': 'loss', 'content': 0.0010045451344922185, 'timestamp': '2025-09-10 02:25:42.904132', 'step': 4682, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 8068526078144}, 'timestamp': '2025-09-10 02:25:42.934949', 'step': 4682, 'epoch': 2} {'type': 'loss', 'content': 0.0005457932711578906, 'timestamp': '2025-09-10 02:25:42.945212', 'step': 4683, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 384], 'flops': 11390752160256}, 'timestamp': '2025-09-10 02:25:42.980176', 'step': 4683, 'epoch': 2} {'type': 'loss', 'content': 0.0005815924378111959, 'timestamp': '2025-09-10 02:25:43.015032', 'step': 4684, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 464], 'flops': 13763770790336}, 'timestamp': '2025-09-10 02:25:43.054138', 'step': 4684, 'epoch': 2} {'type': 'loss', 'content': 0.005409900564700365, 'timestamp': '2025-09-10 02:25:43.070846', 'step': 4685, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 5695507448064}, 'timestamp': '2025-09-10 02:25:43.102700', 'step': 4685, 'epoch': 2} {'type': 'loss', 'content': 0.00027891225181519985, 'timestamp': '2025-09-10 02:25:43.107089', 'step': 4686, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 5695507448064}, 'timestamp': '2025-09-10 02:25:43.137992', 'step': 4686, 'epoch': 2} {'type': 'loss', 'content': 0.00238403445109725, 'timestamp': '2025-09-10 02:25:43.142416', 'step': 4687, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 336], 'flops': 9966940982208}, 'timestamp': '2025-09-10 02:25:43.175928', 'step': 4687, 'epoch': 2} {'type': 'loss', 'content': 0.0003628423437476158, 'timestamp': '2025-09-10 02:25:43.210185', 'step': 4688, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 8068526078144}, 'timestamp': '2025-09-10 02:25:43.240934', 'step': 4688, 'epoch': 2} {'type': 'loss', 'content': 0.005772776901721954, 'timestamp': '2025-09-10 02:25:43.248672', 'step': 4689, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 7119318626112}, 'timestamp': '2025-09-10 02:25:43.279895', 'step': 4689, 'epoch': 2} {'type': 'loss', 'content': 5.581247023656033e-05, 'timestamp': '2025-09-10 02:25:43.287182', 'step': 4690, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 304], 'flops': 9017733530176}, 'timestamp': '2025-09-10 02:25:43.318782', 'step': 4690, 'epoch': 2} {'type': 'loss', 'content': 0.001327107078395784, 'timestamp': '2025-09-10 02:25:43.331000', 'step': 4691, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 176], 'flops': 5220903722048}, 'timestamp': '2025-09-10 02:25:43.361819', 'step': 4691, 'epoch': 2} {'type': 'loss', 'content': 0.039219219237565994, 'timestamp': '2025-09-10 02:25:43.386848', 'step': 4692, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 7119318626112}, 'timestamp': '2025-09-10 02:25:43.420335', 'step': 4692, 'epoch': 2} {'type': 'loss', 'content': 8.427041029790416e-05, 'timestamp': '2025-09-10 02:25:43.425397', 'step': 4693, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 6170111174080}, 'timestamp': '2025-09-10 02:25:43.456992', 'step': 4693, 'epoch': 2} {'type': 'loss', 'content': 0.00012090602103853598, 'timestamp': '2025-09-10 02:25:43.463920', 'step': 4694, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 464], 'flops': 13763770790336}, 'timestamp': '2025-09-10 02:25:43.512243', 'step': 4694, 'epoch': 2} {'type': 'loss', 'content': 0.0006930717499926686, 'timestamp': '2025-09-10 02:25:43.529330', 'step': 4695, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 6644714900096}, 'timestamp': '2025-09-10 02:25:43.566468', 'step': 4695, 'epoch': 2} {'type': 'loss', 'content': 0.0009244754328392446, 'timestamp': '2025-09-10 02:25:43.594568', 'step': 4696, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 7119318626112}, 'timestamp': '2025-09-10 02:25:43.626716', 'step': 4696, 'epoch': 2} {'type': 'loss', 'content': 0.005284165497869253, 'timestamp': '2025-09-10 02:25:43.631652', 'step': 4697, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 336], 'flops': 9966940982208}, 'timestamp': '2025-09-10 02:25:43.665532', 'step': 4697, 'epoch': 2} {'type': 'loss', 'content': 0.0008305375231429935, 'timestamp': '2025-09-10 02:25:43.678870', 'step': 4698, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 448], 'flops': 13289167064320}, 'timestamp': '2025-09-10 02:25:43.719171', 'step': 4698, 'epoch': 2} {'type': 'loss', 'content': 0.0010617909720167518, 'timestamp': '2025-09-10 02:25:43.735466', 'step': 4699, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 7593922352128}, 'timestamp': '2025-09-10 02:25:43.768609', 'step': 4699, 'epoch': 2} {'type': 'loss', 'content': 0.0003633351589087397, 'timestamp': '2025-09-10 02:25:43.797140', 'step': 4700, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 7593922352128}, 'timestamp': '2025-09-10 02:25:43.827485', 'step': 4700, 'epoch': 2} {'type': 'loss', 'content': 0.001146303373388946, 'timestamp': '2025-09-10 02:25:43.833032', 'step': 4701, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 7593922352128}, 'timestamp': '2025-09-10 02:25:43.864017', 'step': 4701, 'epoch': 2} {'type': 'loss', 'content': 0.0001229366025654599, 'timestamp': '2025-09-10 02:25:43.871844', 'step': 4702, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 7119318626112}, 'timestamp': '2025-09-10 02:25:43.903234', 'step': 4702, 'epoch': 2} {'type': 'loss', 'content': 0.0005992205115035176, 'timestamp': '2025-09-10 02:25:43.910729', 'step': 4703, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 7119318626112}, 'timestamp': '2025-09-10 02:25:43.942758', 'step': 4703, 'epoch': 2} {'type': 'loss', 'content': 0.0020961128175258636, 'timestamp': '2025-09-10 02:25:43.971239', 'step': 4704, 'epoch': 2} {'type': 'flops', 'content': [{'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5695213340928}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3796808916480}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4429610391296}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4746011128704}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4746011128704}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6011614078336}, {'type': 'perplexity', 'in_batch_dim': [8, 736], 'batch_size': 8, 'flops': 14554433988352}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3796808916480}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4113209653888}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3796808916480}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4746011128704}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2847606704256}, {'type': 'perplexity', 'in_batch_dim': [8, 320], 'batch_size': 8, 'flops': 6328014815744}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5378812603520}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3796808916480}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4746011128704}, {'type': 'perplexity', 'in_batch_dim': [8, 352], 'batch_size': 8, 'flops': 6960816290560}, {'type': 'perplexity', 'in_batch_dim': [8, 384], 'batch_size': 8, 'flops': 7593617765376}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5378812603520}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4429610391296}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4113209653888}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5062411866112}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5062411866112}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4429610391296}, {'type': 'perplexity', 'in_batch_dim': [8, 480], 'batch_size': 8, 'flops': 9492022189824}, {'type': 'perplexity', 'in_batch_dim': [8, 448], 'batch_size': 8, 'flops': 8859220715008}, {'type': 'perplexity', 'in_batch_dim': [8, 320], 'batch_size': 8, 'flops': 6328014815744}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3796808916480}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6011614078336}, {'type': 'perplexity', 'in_batch_dim': [8, 320], 'batch_size': 8, 'flops': 6328014815744}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5695213340928}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4746011128704}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4429610391296}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5378812603520}, {'type': 'perplexity', 'in_batch_dim': [8, 544], 'batch_size': 8, 'flops': 10757625139456}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4429610391296}, {'type': 'perplexity', 'in_batch_dim': [8, 320], 'batch_size': 8, 'flops': 6328014815744}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4113209653888}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4113209653888}, {'type': 'perplexity', 'in_batch_dim': [8, 320], 'batch_size': 8, 'flops': 6328014815744}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3480408179072}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5062411866112}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6011614078336}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4429610391296}, {'type': 'perplexity', 'in_batch_dim': [8, 384], 'batch_size': 8, 'flops': 7593617765376}, {'type': 'perplexity', 'in_batch_dim': [8, 416], 'batch_size': 8, 'flops': 8226419240192}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4746011128704}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3480408179072}, {'type': 'perplexity', 'in_batch_dim': [8, 384], 'batch_size': 8, 'flops': 7593617765376}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3480408179072}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3480408179072}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5378812603520}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3796808916480}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4113209653888}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5062411866112}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5378812603520}, {'type': 'perplexity', 'in_batch_dim': [8, 320], 'batch_size': 8, 'flops': 6328014815744}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4429610391296}, {'type': 'perplexity', 'in_batch_dim': [8, 160], 'batch_size': 8, 'flops': 3164007441664}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4113209653888}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5695213340928}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3796808916480}, {'type': 'perplexity', 'in_batch_dim': [8, 160], 'batch_size': 8, 'flops': 3164007441664}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6011614078336}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5062411866112}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4429610391296}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3480408179072}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4746011128704}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3480408179072}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4113209653888}, {'type': 'perplexity', 'in_batch_dim': [8, 624], 'batch_size': 8, 'flops': 12339628826496}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4113209653888}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5695213340928}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5062411866112}, {'type': 'perplexity', 'in_batch_dim': [8, 384], 'batch_size': 8, 'flops': 7593617765376}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4429610391296}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4113209653888}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5062411866112}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4429610391296}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5695213340928}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3796808916480}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5378812603520}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4429610391296}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5378812603520}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3796808916480}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5695213340928}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4429610391296}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5062411866112}, {'type': 'perplexity', 'in_batch_dim': [8, 160], 'batch_size': 8, 'flops': 3164007441664}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4746011128704}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5695213340928}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3796808916480}, {'type': 'perplexity', 'in_batch_dim': [8, 416], 'batch_size': 8, 'flops': 8226419240192}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3796808916480}, {'type': 'perplexity', 'in_batch_dim': [8, 160], 'batch_size': 8, 'flops': 3164007441664}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4746011128704}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3796808916480}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2847606704256}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5378812603520}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4429610391296}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4113209653888}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4429610391296}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6011614078336}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4113209653888}, {'type': 'perplexity', 'in_batch_dim': [8, 320], 'batch_size': 8, 'flops': 6328014815744}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4746011128704}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4746011128704}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3796808916480}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5062411866112}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3480408179072}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3796808916480}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3480408179072}, {'type': 'perplexity', 'in_batch_dim': [8, 352], 'batch_size': 8, 'flops': 6960816290560}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5062411866112}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4429610391296}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6011614078336}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5062411866112}, {'type': 'perplexity', 'in_batch_dim': [8, 576], 'batch_size': 8, 'flops': 11390426614272}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4429610391296}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3796808916480}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6011614078336}, {'type': 'perplexity', 'in_batch_dim': [8, 432], 'batch_size': 8, 'flops': 8542819977600}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5378812603520}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5378812603520}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5062411866112}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4113209653888}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4113209653888}, {'type': 'perplexity', 'in_batch_dim': [8, 368], 'batch_size': 8, 'flops': 7277217027968}, {'type': 'perplexity', 'in_batch_dim': [8, 160], 'batch_size': 8, 'flops': 3164007441664}, {'type': 'perplexity', 'in_batch_dim': [8, 368], 'batch_size': 8, 'flops': 7277217027968}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3796808916480}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4746011128704}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4113209653888}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4429610391296}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5695213340928}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5695213340928}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5062411866112}, {'type': 'perplexity', 'in_batch_dim': [8, 432], 'batch_size': 8, 'flops': 8542819977600}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5062411866112}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4113209653888}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3480408179072}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3480408179072}, {'type': 'perplexity', 'in_batch_dim': [8, 160], 'batch_size': 8, 'flops': 3164007441664}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4113209653888}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3796808916480}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5062411866112}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 368], 'batch_size': 8, 'flops': 7277217027968}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5378812603520}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5378812603520}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5062411866112}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2847606704256}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4429610391296}, {'type': 'perplexity', 'in_batch_dim': [8, 320], 'batch_size': 8, 'flops': 6328014815744}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3796808916480}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4429610391296}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4113209653888}, {'type': 'perplexity', 'in_batch_dim': [8, 384], 'batch_size': 8, 'flops': 7593617765376}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5378812603520}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4746011128704}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3480408179072}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5062411866112}, {'type': 'perplexity', 'in_batch_dim': [8, 320], 'batch_size': 8, 'flops': 6328014815744}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5378812603520}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4113209653888}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4113209653888}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5695213340928}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3480408179072}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6011614078336}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4746011128704}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5378812603520}, {'type': 'perplexity', 'in_batch_dim': [8, 320], 'batch_size': 8, 'flops': 6328014815744}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4429610391296}, {'type': 'perplexity', 'in_batch_dim': [8, 320], 'batch_size': 8, 'flops': 6328014815744}, {'type': 'perplexity', 'in_batch_dim': [8, 416], 'batch_size': 8, 'flops': 8226419240192}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5062411866112}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5062411866112}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2847606704256}, {'type': 'perplexity', 'in_batch_dim': [8, 336], 'batch_size': 8, 'flops': 6644415553152}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6011614078336}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5062411866112}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4113209653888}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5695213340928}, {'type': 'perplexity', 'in_batch_dim': [8, 416], 'batch_size': 8, 'flops': 8226419240192}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3796808916480}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5695213340928}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4746011128704}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4429610391296}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4113209653888}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3480408179072}, {'type': 'perplexity', 'in_batch_dim': [8, 320], 'batch_size': 8, 'flops': 6328014815744}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3480408179072}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5378812603520}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6011614078336}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4429610391296}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5695213340928}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5695213340928}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3480408179072}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4429610391296}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5062411866112}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3480408179072}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4113209653888}, {'type': 'perplexity', 'in_batch_dim': [8, 352], 'batch_size': 8, 'flops': 6960816290560}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4113209653888}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2847606704256}, {'type': 'perplexity', 'in_batch_dim': [8, 160], 'batch_size': 8, 'flops': 3164007441664}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3796808916480}, {'type': 'perplexity', 'in_batch_dim': [8, 496], 'batch_size': 8, 'flops': 9808422927232}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3480408179072}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4113209653888}, {'type': 'perplexity', 'in_batch_dim': [8, 544], 'batch_size': 8, 'flops': 10757625139456}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6011614078336}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5378812603520}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5695213340928}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4746011128704}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5695213340928}, {'type': 'perplexity', 'in_batch_dim': [8, 384], 'batch_size': 8, 'flops': 7593617765376}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3480408179072}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5378812603520}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3480408179072}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5378812603520}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5062411866112}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3796808916480}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5062411866112}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5695213340928}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5378812603520}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4429610391296}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5062411866112}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3480408179072}, {'type': 'perplexity', 'in_batch_dim': [8, 656], 'batch_size': 8, 'flops': 12972430301312}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5062411866112}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3796808916480}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3480408179072}, {'type': 'perplexity', 'in_batch_dim': [8, 160], 'batch_size': 8, 'flops': 3164007441664}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5695213340928}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6011614078336}, {'type': 'perplexity', 'in_batch_dim': [8, 320], 'batch_size': 8, 'flops': 6328014815744}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5062411866112}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5062411866112}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5695213340928}, {'type': 'perplexity', 'in_batch_dim': [8, 320], 'batch_size': 8, 'flops': 6328014815744}, {'type': 'perplexity', 'in_batch_dim': [8, 384], 'batch_size': 8, 'flops': 7593617765376}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5062411866112}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4746011128704}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4113209653888}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4746011128704}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4746011128704}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3480408179072}, {'type': 'perplexity', 'in_batch_dim': [8, 320], 'batch_size': 8, 'flops': 6328014815744}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5695213340928}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5062411866112}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4113209653888}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4746011128704}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2847606704256}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3796808916480}, {'type': 'perplexity', 'in_batch_dim': [8, 336], 'batch_size': 8, 'flops': 6644415553152}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4113209653888}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4746011128704}, {'type': 'perplexity', 'in_batch_dim': [8, 496], 'batch_size': 8, 'flops': 9808422927232}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5378812603520}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5378812603520}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4429610391296}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5062411866112}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6011614078336}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4746011128704}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3796808916480}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4113209653888}, {'type': 'perplexity', 'in_batch_dim': [8, 496], 'batch_size': 8, 'flops': 9808422927232}, {'type': 'perplexity', 'in_batch_dim': [8, 448], 'batch_size': 8, 'flops': 8859220715008}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6011614078336}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3796808916480}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3480408179072}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5062411866112}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4429610391296}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4113209653888}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4746011128704}, {'type': 'perplexity', 'in_batch_dim': [8, 320], 'batch_size': 8, 'flops': 6328014815744}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6011614078336}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3480408179072}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3796808916480}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3796808916480}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4746011128704}, {'type': 'perplexity', 'in_batch_dim': [8, 416], 'batch_size': 8, 'flops': 8226419240192}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5695213340928}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6011614078336}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6011614078336}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4746011128704}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3480408179072}, {'type': 'perplexity', 'in_batch_dim': [8, 160], 'batch_size': 8, 'flops': 3164007441664}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5695213340928}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3480408179072}, {'type': 'perplexity', 'in_batch_dim': [8, 400], 'batch_size': 8, 'flops': 7910018502784}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4429610391296}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5378812603520}, {'type': 'perplexity', 'in_batch_dim': [8, 400], 'batch_size': 8, 'flops': 7910018502784}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5378812603520}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3480408179072}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6011614078336}, {'type': 'perplexity', 'in_batch_dim': [8, 160], 'batch_size': 8, 'flops': 3164007441664}, {'type': 'perplexity', 'in_batch_dim': [8, 352], 'batch_size': 8, 'flops': 6960816290560}, {'type': 'perplexity', 'in_batch_dim': [8, 320], 'batch_size': 8, 'flops': 6328014815744}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5062411866112}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5062411866112}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4113209653888}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4429610391296}, {'type': 'perplexity', 'in_batch_dim': [8, 352], 'batch_size': 8, 'flops': 6960816290560}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4113209653888}, {'type': 'perplexity', 'in_batch_dim': [8, 160], 'batch_size': 8, 'flops': 3164007441664}, {'type': 'perplexity', 'in_batch_dim': [8, 336], 'batch_size': 8, 'flops': 6644415553152}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4429610391296}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3796808916480}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5378812603520}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5378812603520}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4113209653888}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5378812603520}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3796808916480}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5378812603520}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5378812603520}, {'type': 'perplexity', 'in_batch_dim': [8, 160], 'batch_size': 8, 'flops': 3164007441664}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3480408179072}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6011614078336}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5695213340928}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5378812603520}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5378812603520}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4429610391296}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4746011128704}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6011614078336}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5062411866112}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6011614078336}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5378812603520}, {'type': 'perplexity', 'in_batch_dim': [8, 160], 'batch_size': 8, 'flops': 3164007441664}, {'type': 'perplexity', 'in_batch_dim': [8, 160], 'batch_size': 8, 'flops': 3164007441664}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5062411866112}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5062411866112}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3796808916480}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6011614078336}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4113209653888}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4746011128704}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5378812603520}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3480408179072}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3796808916480}, {'type': 'perplexity', 'in_batch_dim': [8, 320], 'batch_size': 8, 'flops': 6328014815744}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5062411866112}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4746011128704}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5062411866112}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4746011128704}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6011614078336}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3796808916480}, {'type': 'perplexity', 'in_batch_dim': [8, 160], 'batch_size': 8, 'flops': 3164007441664}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2847606704256}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4429610391296}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4746011128704}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5062411866112}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5378812603520}, {'type': 'perplexity', 'in_batch_dim': [8, 464], 'batch_size': 8, 'flops': 9175621452416}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4113209653888}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3480408179072}, {'type': 'perplexity', 'in_batch_dim': [8, 384], 'batch_size': 8, 'flops': 7593617765376}, {'type': 'perplexity', 'in_batch_dim': [8, 448], 'batch_size': 8, 'flops': 8859220715008}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5378812603520}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4113209653888}, {'type': 'perplexity', 'in_batch_dim': [8, 320], 'batch_size': 8, 'flops': 6328014815744}, {'type': 'perplexity', 'in_batch_dim': [8, 560], 'batch_size': 8, 'flops': 11074025876864}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5378812603520}, {'type': 'perplexity', 'in_batch_dim': [8, 160], 'batch_size': 8, 'flops': 3164007441664}, {'type': 'perplexity', 'in_batch_dim': [8, 320], 'batch_size': 8, 'flops': 6328014815744}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5062411866112}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6011614078336}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4746011128704}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6011614078336}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3796808916480}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4429610391296}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4113209653888}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4746011128704}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3480408179072}, {'type': 'perplexity', 'in_batch_dim': [8, 384], 'batch_size': 8, 'flops': 7593617765376}, {'type': 'perplexity', 'in_batch_dim': [8, 576], 'batch_size': 8, 'flops': 11390426614272}, {'type': 'perplexity', 'in_batch_dim': [8, 160], 'batch_size': 8, 'flops': 3164007441664}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4746011128704}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4746011128704}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4429610391296}, {'type': 'perplexity', 'in_batch_dim': [8, 336], 'batch_size': 8, 'flops': 6644415553152}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4746011128704}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3796808916480}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3796808916480}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5062411866112}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5378812603520}, {'type': 'perplexity', 'in_batch_dim': [8, 368], 'batch_size': 8, 'flops': 7277217027968}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3480408179072}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4113209653888}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4746011128704}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3480408179072}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4746011128704}, {'type': 'perplexity', 'in_batch_dim': [8, 1168], 'batch_size': 8, 'flops': 23097253898368}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5062411866112}, {'type': 'perplexity', 'in_batch_dim': [8, 160], 'batch_size': 8, 'flops': 3164007441664}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4746011128704}, {'type': 'perplexity', 'in_batch_dim': [8, 640], 'batch_size': 8, 'flops': 12656029563904}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5378812603520}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4429610391296}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4429610391296}, {'type': 'perplexity', 'in_batch_dim': [8, 384], 'batch_size': 8, 'flops': 7593617765376}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3480408179072}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4113209653888}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3796808916480}, {'type': 'perplexity', 'in_batch_dim': [8, 336], 'batch_size': 8, 'flops': 6644415553152}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3480408179072}, {'type': 'perplexity', 'in_batch_dim': [6, 208], 'batch_size': 8, 'flops': 4113209653888}], 'timestamp': '2025-09-10 02:25:54.264978', 'step': 4704, 'epoch': 2} {'type': 'pplx', 'content': 22252700.049582753, 'timestamp': '2025-09-10 02:25:54.267701', 'step': 4704, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 8543129804160}, 'timestamp': '2025-09-10 02:25:54.299493', 'step': 4704, 'epoch': 2} {'type': 'loss', 'content': 0.0002747270918916911, 'timestamp': '2025-09-10 02:25:54.306002', 'step': 4705, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 176], 'flops': 5220903722048}, 'timestamp': '2025-09-10 02:25:54.337028', 'step': 4705, 'epoch': 2} {'type': 'loss', 'content': 0.0001008029212243855, 'timestamp': '2025-09-10 02:25:54.341004', 'step': 4706, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 352], 'flops': 10441544708224}, 'timestamp': '2025-09-10 02:25:54.374810', 'step': 4706, 'epoch': 2} {'type': 'loss', 'content': 0.0037881359457969666, 'timestamp': '2025-09-10 02:25:54.388470', 'step': 4707, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 7119318626112}, 'timestamp': '2025-09-10 02:25:54.419023', 'step': 4707, 'epoch': 2} {'type': 'loss', 'content': 0.00021644457592628896, 'timestamp': '2025-09-10 02:25:54.447372', 'step': 4708, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 656], 'flops': 19459015502528}, 'timestamp': '2025-09-10 02:25:54.500651', 'step': 4708, 'epoch': 2} {'type': 'loss', 'content': 0.0004264476883690804, 'timestamp': '2025-09-10 02:25:54.524182', 'step': 4709, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 8543129804160}, 'timestamp': '2025-09-10 02:25:54.555932', 'step': 4709, 'epoch': 2} {'type': 'loss', 'content': 0.00112416862975806, 'timestamp': '2025-09-10 02:25:54.566184', 'step': 4710, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 7119318626112}, 'timestamp': '2025-09-10 02:25:54.602033', 'step': 4710, 'epoch': 2} {'type': 'loss', 'content': 0.001086343778297305, 'timestamp': '2025-09-10 02:25:54.609220', 'step': 4711, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 5695507448064}, 'timestamp': '2025-09-10 02:25:54.641173', 'step': 4711, 'epoch': 2} {'type': 'loss', 'content': 0.0024282841477543116, 'timestamp': '2025-09-10 02:25:54.666306', 'step': 4712, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 528], 'flops': 15662185694400}, 'timestamp': '2025-09-10 02:25:54.711375', 'step': 4712, 'epoch': 2} {'type': 'loss', 'content': 0.00020581232092808932, 'timestamp': '2025-09-10 02:25:54.730406', 'step': 4713, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [3, 224], 'flops': 4983601869792}, 'timestamp': '2025-09-10 02:25:54.764147', 'step': 4713, 'epoch': 2} {'type': 'loss', 'content': 0.00017851527081802487, 'timestamp': '2025-09-10 02:25:54.767859', 'step': 4714, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 6644714900096}, 'timestamp': '2025-09-10 02:25:54.820878', 'step': 4714, 'epoch': 3} {'type': 'loss', 'content': 7.199771062005311e-05, 'timestamp': '2025-09-10 02:25:54.825650', 'step': 4715, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 7593922352128}, 'timestamp': '2025-09-10 02:25:54.860167', 'step': 4715, 'epoch': 3} {'type': 'loss', 'content': 0.0005822144448757172, 'timestamp': '2025-09-10 02:25:54.888916', 'step': 4716, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 6644714900096}, 'timestamp': '2025-09-10 02:25:54.925746', 'step': 4716, 'epoch': 3} {'type': 'loss', 'content': 0.0002994556853082031, 'timestamp': '2025-09-10 02:25:54.930312', 'step': 4717, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 6170111174080}, 'timestamp': '2025-09-10 02:25:54.966869', 'step': 4717, 'epoch': 3} {'type': 'loss', 'content': 6.113481504144147e-05, 'timestamp': '2025-09-10 02:25:54.973429', 'step': 4718, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 6644714900096}, 'timestamp': '2025-09-10 02:25:55.010593', 'step': 4718, 'epoch': 3} {'type': 'loss', 'content': 6.410970672732219e-05, 'timestamp': '2025-09-10 02:25:55.017589', 'step': 4719, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 176], 'flops': 5220903722048}, 'timestamp': '2025-09-10 02:25:55.049280', 'step': 4719, 'epoch': 3} {'type': 'loss', 'content': 0.0018373571801930666, 'timestamp': '2025-09-10 02:25:55.076029', 'step': 4720, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 6644714900096}, 'timestamp': '2025-09-10 02:25:55.108559', 'step': 4720, 'epoch': 3} {'type': 'loss', 'content': 0.005885870661586523, 'timestamp': '2025-09-10 02:25:55.112996', 'step': 4721, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 7119318626112}, 'timestamp': '2025-09-10 02:25:55.143782', 'step': 4721, 'epoch': 3} {'type': 'loss', 'content': 0.000682682148180902, 'timestamp': '2025-09-10 02:25:55.151398', 'step': 4722, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 8068526078144}, 'timestamp': '2025-09-10 02:25:55.187563', 'step': 4722, 'epoch': 3} {'type': 'loss', 'content': 0.0009401330025866628, 'timestamp': '2025-09-10 02:25:55.197939', 'step': 4723, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 7593922352128}, 'timestamp': '2025-09-10 02:25:55.228686', 'step': 4723, 'epoch': 3} {'type': 'loss', 'content': 9.497522114543244e-05, 'timestamp': '2025-09-10 02:25:55.257550', 'step': 4724, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 6170111174080}, 'timestamp': '2025-09-10 02:25:55.288450', 'step': 4724, 'epoch': 3} {'type': 'loss', 'content': 0.0003811214992310852, 'timestamp': '2025-09-10 02:25:55.293082', 'step': 4725, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 6170111174080}, 'timestamp': '2025-09-10 02:25:55.322824', 'step': 4725, 'epoch': 3} {'type': 'loss', 'content': 0.003699273569509387, 'timestamp': '2025-09-10 02:25:55.329732', 'step': 4726, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 5695507448064}, 'timestamp': '2025-09-10 02:25:55.360246', 'step': 4726, 'epoch': 3} {'type': 'loss', 'content': 0.0002697373856790364, 'timestamp': '2025-09-10 02:25:55.364754', 'step': 4727, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 336], 'flops': 9966940982208}, 'timestamp': '2025-09-10 02:25:55.398993', 'step': 4727, 'epoch': 3} {'type': 'loss', 'content': 0.0001845106016844511, 'timestamp': '2025-09-10 02:25:55.433261', 'step': 4728, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 5695507448064}, 'timestamp': '2025-09-10 02:25:55.465486', 'step': 4728, 'epoch': 3} {'type': 'loss', 'content': 0.0001434317382518202, 'timestamp': '2025-09-10 02:25:55.467733', 'step': 4729, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 176], 'flops': 5220903722048}, 'timestamp': '2025-09-10 02:25:55.499235', 'step': 4729, 'epoch': 3} {'type': 'loss', 'content': 8.866209100233391e-05, 'timestamp': '2025-09-10 02:25:55.503347', 'step': 4730, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 304], 'flops': 9017733530176}, 'timestamp': '2025-09-10 02:25:55.534093', 'step': 4730, 'epoch': 3} {'type': 'loss', 'content': 0.0001506084663560614, 'timestamp': '2025-09-10 02:25:55.546333', 'step': 4731, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 5695507448064}, 'timestamp': '2025-09-10 02:25:55.576315', 'step': 4731, 'epoch': 3} {'type': 'loss', 'content': 0.0013919537886977196, 'timestamp': '2025-09-10 02:25:55.601714', 'step': 4732, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 8068526078144}, 'timestamp': '2025-09-10 02:25:55.642361', 'step': 4732, 'epoch': 3} {'type': 'loss', 'content': 0.00031470126123167574, 'timestamp': '2025-09-10 02:25:55.649630', 'step': 4733, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 5695507448064}, 'timestamp': '2025-09-10 02:25:55.692765', 'step': 4733, 'epoch': 3} {'type': 'loss', 'content': 0.0006087294896133244, 'timestamp': '2025-09-10 02:25:55.698471', 'step': 4734, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 8068526078144}, 'timestamp': '2025-09-10 02:25:55.731178', 'step': 4734, 'epoch': 3} {'type': 'loss', 'content': 0.0029154361691325903, 'timestamp': '2025-09-10 02:25:55.741490', 'step': 4735, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 176], 'flops': 5220903722048}, 'timestamp': '2025-09-10 02:25:55.773923', 'step': 4735, 'epoch': 3} {'type': 'loss', 'content': 0.0005959026166237891, 'timestamp': '2025-09-10 02:25:55.799731', 'step': 4736, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 6170111174080}, 'timestamp': '2025-09-10 02:25:55.832859', 'step': 4736, 'epoch': 3} {'type': 'loss', 'content': 0.0007912717992439866, 'timestamp': '2025-09-10 02:25:55.845448', 'step': 4737, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 7119318626112}, 'timestamp': '2025-09-10 02:25:55.878382', 'step': 4737, 'epoch': 3} {'type': 'loss', 'content': 8.05290910648182e-05, 'timestamp': '2025-09-10 02:25:55.885600', 'step': 4738, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 6644714900096}, 'timestamp': '2025-09-10 02:25:55.916824', 'step': 4738, 'epoch': 3} {'type': 'loss', 'content': 0.0016422310145571828, 'timestamp': '2025-09-10 02:25:55.923786', 'step': 4739, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 6644714900096}, 'timestamp': '2025-09-10 02:25:55.957047', 'step': 4739, 'epoch': 3} {'type': 'loss', 'content': 0.00033386718132533133, 'timestamp': '2025-09-10 02:25:55.984672', 'step': 4740, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 6170111174080}, 'timestamp': '2025-09-10 02:25:56.018814', 'step': 4740, 'epoch': 3} {'type': 'loss', 'content': 0.0002576867409516126, 'timestamp': '2025-09-10 02:25:56.023115', 'step': 4741, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 4746299996032}, 'timestamp': '2025-09-10 02:25:56.055754', 'step': 4741, 'epoch': 3} {'type': 'loss', 'content': 0.0020760188344866037, 'timestamp': '2025-09-10 02:25:56.058458', 'step': 4742, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 7119318626112}, 'timestamp': '2025-09-10 02:25:56.090856', 'step': 4742, 'epoch': 3} {'type': 'loss', 'content': 0.004879082087427378, 'timestamp': '2025-09-10 02:25:56.098101', 'step': 4743, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 352], 'flops': 10441544708224}, 'timestamp': '2025-09-10 02:25:56.132776', 'step': 4743, 'epoch': 3} {'type': 'loss', 'content': 4.860662738792598e-05, 'timestamp': '2025-09-10 02:25:56.167244', 'step': 4744, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 6170111174080}, 'timestamp': '2025-09-10 02:25:56.200709', 'step': 4744, 'epoch': 3} {'type': 'loss', 'content': 0.0001819897734094411, 'timestamp': '2025-09-10 02:25:56.204983', 'step': 4745, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 6644714900096}, 'timestamp': '2025-09-10 02:25:56.236803', 'step': 4745, 'epoch': 3} {'type': 'loss', 'content': 0.00019539693312253803, 'timestamp': '2025-09-10 02:25:56.243514', 'step': 4746, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 176], 'flops': 5220903722048}, 'timestamp': '2025-09-10 02:25:56.286988', 'step': 4746, 'epoch': 3} {'type': 'loss', 'content': 0.0030586186330765486, 'timestamp': '2025-09-10 02:25:56.291110', 'step': 4747, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 7593922352128}, 'timestamp': '2025-09-10 02:25:56.322545', 'step': 4747, 'epoch': 3} {'type': 'loss', 'content': 0.00042248849058523774, 'timestamp': '2025-09-10 02:25:56.351141', 'step': 4748, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 8068526078144}, 'timestamp': '2025-09-10 02:25:56.382291', 'step': 4748, 'epoch': 3} {'type': 'loss', 'content': 0.0006272942409850657, 'timestamp': '2025-09-10 02:25:56.389844', 'step': 4749, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 4746299996032}, 'timestamp': '2025-09-10 02:25:56.421653', 'step': 4749, 'epoch': 3} {'type': 'loss', 'content': 0.00040742545388638973, 'timestamp': '2025-09-10 02:25:56.425826', 'step': 4750, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 6644714900096}, 'timestamp': '2025-09-10 02:25:56.458601', 'step': 4750, 'epoch': 3} {'type': 'loss', 'content': 0.00014489439490716904, 'timestamp': '2025-09-10 02:25:56.465546', 'step': 4751, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-10 02:25:56.499456', 'step': 4751, 'epoch': 3} {'type': 'loss', 'content': 0.000770228507462889, 'timestamp': '2025-09-10 02:25:56.523257', 'step': 4752, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 5695507448064}, 'timestamp': '2025-09-10 02:25:56.555302', 'step': 4752, 'epoch': 3} {'type': 'loss', 'content': 0.00037505757063627243, 'timestamp': '2025-09-10 02:25:56.557426', 'step': 4753, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 6644714900096}, 'timestamp': '2025-09-10 02:25:56.587932', 'step': 4753, 'epoch': 3} {'type': 'loss', 'content': 0.00023457292991224676, 'timestamp': '2025-09-10 02:25:56.595023', 'step': 4754, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 7119318626112}, 'timestamp': '2025-09-10 02:25:56.626289', 'step': 4754, 'epoch': 3} {'type': 'loss', 'content': 0.00568475853651762, 'timestamp': '2025-09-10 02:25:56.633673', 'step': 4755, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 5695507448064}, 'timestamp': '2025-09-10 02:25:56.665423', 'step': 4755, 'epoch': 3} {'type': 'loss', 'content': 0.0008253042469732463, 'timestamp': '2025-09-10 02:25:56.690687', 'step': 4756, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 336], 'flops': 9966940982208}, 'timestamp': '2025-09-10 02:25:56.722401', 'step': 4756, 'epoch': 3} {'type': 'loss', 'content': 0.0006686433334834874, 'timestamp': '2025-09-10 02:25:56.734993', 'step': 4757, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 6644714900096}, 'timestamp': '2025-09-10 02:25:56.765610', 'step': 4757, 'epoch': 3} {'type': 'loss', 'content': 0.0004306059854570776, 'timestamp': '2025-09-10 02:25:56.772714', 'step': 4758, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 176], 'flops': 5220903722048}, 'timestamp': '2025-09-10 02:25:56.802645', 'step': 4758, 'epoch': 3} {'type': 'loss', 'content': 0.03919557109475136, 'timestamp': '2025-09-10 02:25:56.806926', 'step': 4759, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 6644714900096}, 'timestamp': '2025-09-10 02:25:56.837195', 'step': 4759, 'epoch': 3} {'type': 'loss', 'content': 0.0008901845430955291, 'timestamp': '2025-09-10 02:25:56.865124', 'step': 4760, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 8068526078144}, 'timestamp': '2025-09-10 02:25:56.899149', 'step': 4760, 'epoch': 3} {'type': 'loss', 'content': 0.00022610726591665298, 'timestamp': '2025-09-10 02:25:56.906750', 'step': 4761, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 6170111174080}, 'timestamp': '2025-09-10 02:25:56.940355', 'step': 4761, 'epoch': 3} {'type': 'loss', 'content': 6.875943654449657e-05, 'timestamp': '2025-09-10 02:25:56.947266', 'step': 4762, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 8543129804160}, 'timestamp': '2025-09-10 02:25:56.978084', 'step': 4762, 'epoch': 3} {'type': 'loss', 'content': 0.0004053361772093922, 'timestamp': '2025-09-10 02:25:56.989159', 'step': 4763, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 8543129804160}, 'timestamp': '2025-09-10 02:25:57.022371', 'step': 4763, 'epoch': 3} {'type': 'loss', 'content': 0.0025856548454612494, 'timestamp': '2025-09-10 02:25:57.054174', 'step': 4764, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 4746299996032}, 'timestamp': '2025-09-10 02:25:57.085349', 'step': 4764, 'epoch': 3} {'type': 'loss', 'content': 0.00020959861285518855, 'timestamp': '2025-09-10 02:25:57.087517', 'step': 4765, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 928], 'flops': 27527278844800}, 'timestamp': '2025-09-10 02:25:57.164501', 'step': 4765, 'epoch': 3} {'type': 'loss', 'content': 0.000836056366097182, 'timestamp': '2025-09-10 02:25:57.196102', 'step': 4766, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 5695507448064}, 'timestamp': '2025-09-10 02:25:57.227009', 'step': 4766, 'epoch': 3} {'type': 'loss', 'content': 0.0007483740919269621, 'timestamp': '2025-09-10 02:25:57.231213', 'step': 4767, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 8068526078144}, 'timestamp': '2025-09-10 02:25:57.261606', 'step': 4767, 'epoch': 3} {'type': 'loss', 'content': 0.00014812864537816495, 'timestamp': '2025-09-10 02:25:57.292657', 'step': 4768, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 7119318626112}, 'timestamp': '2025-09-10 02:25:57.323573', 'step': 4768, 'epoch': 3} {'type': 'loss', 'content': 0.0001649035548325628, 'timestamp': '2025-09-10 02:25:57.328618', 'step': 4769, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 400], 'flops': 11865355886272}, 'timestamp': '2025-09-10 02:25:57.367308', 'step': 4769, 'epoch': 3} {'type': 'loss', 'content': 2.814439358189702e-05, 'timestamp': '2025-09-10 02:25:57.382928', 'step': 4770, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-10 02:25:57.413769', 'step': 4770, 'epoch': 3} {'type': 'loss', 'content': 0.0018101210007444024, 'timestamp': '2025-09-10 02:25:57.415992', 'step': 4771, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 6644714900096}, 'timestamp': '2025-09-10 02:25:57.446752', 'step': 4771, 'epoch': 3} {'type': 'loss', 'content': 0.0005368964048102498, 'timestamp': '2025-09-10 02:25:57.474370', 'step': 4772, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 8068526078144}, 'timestamp': '2025-09-10 02:25:57.504468', 'step': 4772, 'epoch': 3} {'type': 'loss', 'content': 0.0005379213253036141, 'timestamp': '2025-09-10 02:25:57.512288', 'step': 4773, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 7593922352128}, 'timestamp': '2025-09-10 02:25:57.543048', 'step': 4773, 'epoch': 3} {'type': 'loss', 'content': 0.0007029054104350507, 'timestamp': '2025-09-10 02:25:57.550992', 'step': 4774, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 304], 'flops': 9017733530176}, 'timestamp': '2025-09-10 02:25:57.583074', 'step': 4774, 'epoch': 3} {'type': 'loss', 'content': 0.017190443351864815, 'timestamp': '2025-09-10 02:25:57.595255', 'step': 4775, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 6170111174080}, 'timestamp': '2025-09-10 02:25:57.627841', 'step': 4775, 'epoch': 3} {'type': 'loss', 'content': 0.0006791255436837673, 'timestamp': '2025-09-10 02:25:57.655695', 'step': 4776, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 7593922352128}, 'timestamp': '2025-09-10 02:25:57.688970', 'step': 4776, 'epoch': 3} {'type': 'loss', 'content': 4.802838520845398e-05, 'timestamp': '2025-09-10 02:25:57.694277', 'step': 4777, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 6644714900096}, 'timestamp': '2025-09-10 02:25:57.726991', 'step': 4777, 'epoch': 3} {'type': 'loss', 'content': 0.0004995018825866282, 'timestamp': '2025-09-10 02:25:57.734016', 'step': 4778, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 416], 'flops': 12339959612288}, 'timestamp': '2025-09-10 02:25:57.774778', 'step': 4778, 'epoch': 3} {'type': 'loss', 'content': 3.5099070373689756e-05, 'timestamp': '2025-09-10 02:25:57.790648', 'step': 4779, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 6644714900096}, 'timestamp': '2025-09-10 02:25:57.822302', 'step': 4779, 'epoch': 3} {'type': 'loss', 'content': 0.00015079900913406163, 'timestamp': '2025-09-10 02:25:57.850184', 'step': 4780, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 176], 'flops': 5220903722048}, 'timestamp': '2025-09-10 02:25:57.880239', 'step': 4780, 'epoch': 3} {'type': 'loss', 'content': 0.0010758963180705905, 'timestamp': '2025-09-10 02:25:57.882597', 'step': 4781, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 5695507448064}, 'timestamp': '2025-09-10 02:25:57.912637', 'step': 4781, 'epoch': 3} {'type': 'loss', 'content': 0.022532150149345398, 'timestamp': '2025-09-10 02:25:57.917341', 'step': 4782, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 7593922352128}, 'timestamp': '2025-09-10 02:25:57.949411', 'step': 4782, 'epoch': 3} {'type': 'loss', 'content': 0.006792863365262747, 'timestamp': '2025-09-10 02:25:57.957123', 'step': 4783, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 416], 'flops': 12339959612288}, 'timestamp': '2025-09-10 02:25:57.996736', 'step': 4783, 'epoch': 3} {'type': 'loss', 'content': 0.00010572600876912475, 'timestamp': '2025-09-10 02:25:58.033523', 'step': 4784, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 7119318626112}, 'timestamp': '2025-09-10 02:25:58.070175', 'step': 4784, 'epoch': 3} {'type': 'loss', 'content': 0.00010956094047287479, 'timestamp': '2025-09-10 02:25:58.075183', 'step': 4785, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 320], 'flops': 9492337256192}, 'timestamp': '2025-09-10 02:25:58.106182', 'step': 4785, 'epoch': 3} {'type': 'loss', 'content': 0.000302365719107911, 'timestamp': '2025-09-10 02:25:58.118711', 'step': 4786, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 6644714900096}, 'timestamp': '2025-09-10 02:25:58.148933', 'step': 4786, 'epoch': 3} {'type': 'loss', 'content': 0.022172143682837486, 'timestamp': '2025-09-10 02:25:58.156099', 'step': 4787, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 6170111174080}, 'timestamp': '2025-09-10 02:25:58.188968', 'step': 4787, 'epoch': 3} {'type': 'loss', 'content': 9.383214637637138e-05, 'timestamp': '2025-09-10 02:25:58.217333', 'step': 4788, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 8543129804160}, 'timestamp': '2025-09-10 02:25:58.248652', 'step': 4788, 'epoch': 3} {'type': 'loss', 'content': 0.0019239891553297639, 'timestamp': '2025-09-10 02:25:58.259436', 'step': 4789, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 400], 'flops': 11865355886272}, 'timestamp': '2025-09-10 02:25:58.301360', 'step': 4789, 'epoch': 3} {'type': 'loss', 'content': 0.00011966370948357508, 'timestamp': '2025-09-10 02:25:58.316977', 'step': 4790, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 6170111174080}, 'timestamp': '2025-09-10 02:25:58.350131', 'step': 4790, 'epoch': 3} {'type': 'loss', 'content': 0.00045444341958500445, 'timestamp': '2025-09-10 02:25:58.356818', 'step': 4791, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 176], 'flops': 5220903722048}, 'timestamp': '2025-09-10 02:25:58.391166', 'step': 4791, 'epoch': 3} {'type': 'loss', 'content': 0.00012033795792376623, 'timestamp': '2025-09-10 02:25:58.415950', 'step': 4792, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 336], 'flops': 9966940982208}, 'timestamp': '2025-09-10 02:25:58.448459', 'step': 4792, 'epoch': 3} {'type': 'loss', 'content': 0.0009351377957500517, 'timestamp': '2025-09-10 02:25:58.461022', 'step': 4793, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 400], 'flops': 11865355886272}, 'timestamp': '2025-09-10 02:25:58.498355', 'step': 4793, 'epoch': 3} {'type': 'loss', 'content': 0.011824551038444042, 'timestamp': '2025-09-10 02:25:58.513914', 'step': 4794, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 5695507448064}, 'timestamp': '2025-09-10 02:25:58.544714', 'step': 4794, 'epoch': 3} {'type': 'loss', 'content': 0.004735906142741442, 'timestamp': '2025-09-10 02:25:58.549049', 'step': 4795, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 304], 'flops': 9017733530176}, 'timestamp': '2025-09-10 02:25:58.580216', 'step': 4795, 'epoch': 3} {'type': 'loss', 'content': 0.010355941019952297, 'timestamp': '2025-09-10 02:25:58.613219', 'step': 4796, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 8068526078144}, 'timestamp': '2025-09-10 02:25:58.653939', 'step': 4796, 'epoch': 3} {'type': 'loss', 'content': 0.03133935481309891, 'timestamp': '2025-09-10 02:25:58.661346', 'step': 4797, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 5695507448064}, 'timestamp': '2025-09-10 02:25:58.695774', 'step': 4797, 'epoch': 3} {'type': 'loss', 'content': 5.2887880883645266e-05, 'timestamp': '2025-09-10 02:25:58.700167', 'step': 4798, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 6170111174080}, 'timestamp': '2025-09-10 02:25:58.739637', 'step': 4798, 'epoch': 3} {'type': 'loss', 'content': 0.0006416105316020548, 'timestamp': '2025-09-10 02:25:58.746381', 'step': 4799, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 352], 'flops': 10441544708224}, 'timestamp': '2025-09-10 02:25:58.783249', 'step': 4799, 'epoch': 3} {'type': 'loss', 'content': 0.0007519474602304399, 'timestamp': '2025-09-10 02:25:58.817805', 'step': 4800, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 5695507448064}, 'timestamp': '2025-09-10 02:25:58.851422', 'step': 4800, 'epoch': 3} {'type': 'loss', 'content': 0.0013969124993309379, 'timestamp': '2025-09-10 02:25:58.853678', 'step': 4801, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 7119318626112}, 'timestamp': '2025-09-10 02:25:58.885301', 'step': 4801, 'epoch': 3} {'type': 'loss', 'content': 0.00015225332754198462, 'timestamp': '2025-09-10 02:25:58.892596', 'step': 4802, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 496], 'flops': 14712978242368}, 'timestamp': '2025-09-10 02:25:58.935774', 'step': 4802, 'epoch': 3} {'type': 'loss', 'content': 0.002063736552372575, 'timestamp': '2025-09-10 02:25:58.953398', 'step': 4803, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 6644714900096}, 'timestamp': '2025-09-10 02:25:58.994738', 'step': 4803, 'epoch': 3} {'type': 'loss', 'content': 0.00044029252603650093, 'timestamp': '2025-09-10 02:25:59.022422', 'step': 4804, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 6170111174080}, 'timestamp': '2025-09-10 02:25:59.063004', 'step': 4804, 'epoch': 3} {'type': 'loss', 'content': 0.00024363842385355383, 'timestamp': '2025-09-10 02:25:59.067300', 'step': 4805, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 336], 'flops': 9966940982208}, 'timestamp': '2025-09-10 02:25:59.104097', 'step': 4805, 'epoch': 3} {'type': 'loss', 'content': 2.4000339180929586e-05, 'timestamp': '2025-09-10 02:25:59.117432', 'step': 4806, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 5695507448064}, 'timestamp': '2025-09-10 02:25:59.160602', 'step': 4806, 'epoch': 3} {'type': 'loss', 'content': 0.027750907465815544, 'timestamp': '2025-09-10 02:25:59.165189', 'step': 4807, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 6644714900096}, 'timestamp': '2025-09-10 02:25:59.198218', 'step': 4807, 'epoch': 3} {'type': 'loss', 'content': 7.746334449620917e-05, 'timestamp': '2025-09-10 02:25:59.226048', 'step': 4808, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 480], 'flops': 14238374516352}, 'timestamp': '2025-09-10 02:25:59.267682', 'step': 4808, 'epoch': 3} {'type': 'loss', 'content': 0.004865474067628384, 'timestamp': '2025-09-10 02:25:59.284691', 'step': 4809, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 6170111174080}, 'timestamp': '2025-09-10 02:25:59.318826', 'step': 4809, 'epoch': 3} {'type': 'loss', 'content': 0.0006346892914734781, 'timestamp': '2025-09-10 02:25:59.325602', 'step': 4810, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 7593922352128}, 'timestamp': '2025-09-10 02:25:59.358416', 'step': 4810, 'epoch': 3} {'type': 'loss', 'content': 0.0009411797509528697, 'timestamp': '2025-09-10 02:25:59.365575', 'step': 4811, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 5695507448064}, 'timestamp': '2025-09-10 02:25:59.398191', 'step': 4811, 'epoch': 3} {'type': 'loss', 'content': 7.868631655583158e-05, 'timestamp': '2025-09-10 02:25:59.423313', 'step': 4812, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 5695507448064}, 'timestamp': '2025-09-10 02:25:59.454662', 'step': 4812, 'epoch': 3} {'type': 'loss', 'content': 0.00019831575627904385, 'timestamp': '2025-09-10 02:25:59.457327', 'step': 4813, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 6644714900096}, 'timestamp': '2025-09-10 02:25:59.493006', 'step': 4813, 'epoch': 3} {'type': 'loss', 'content': 0.005292465444654226, 'timestamp': '2025-09-10 02:25:59.500034', 'step': 4814, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 4746299996032}, 'timestamp': '2025-09-10 02:25:59.531461', 'step': 4814, 'epoch': 3} {'type': 'loss', 'content': 0.013571852818131447, 'timestamp': '2025-09-10 02:25:59.534195', 'step': 4815, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 6644714900096}, 'timestamp': '2025-09-10 02:25:59.566265', 'step': 4815, 'epoch': 3} {'type': 'loss', 'content': 0.0036411576438695192, 'timestamp': '2025-09-10 02:25:59.594002', 'step': 4816, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 7593922352128}, 'timestamp': '2025-09-10 02:25:59.628397', 'step': 4816, 'epoch': 3} {'type': 'loss', 'content': 0.03516482934355736, 'timestamp': '2025-09-10 02:25:59.633370', 'step': 4817, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 304], 'flops': 9017733530176}, 'timestamp': '2025-09-10 02:25:59.669479', 'step': 4817, 'epoch': 3} {'type': 'loss', 'content': 8.574579987907782e-05, 'timestamp': '2025-09-10 02:25:59.681420', 'step': 4818, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 8543129804160}, 'timestamp': '2025-09-10 02:25:59.714869', 'step': 4818, 'epoch': 3} {'type': 'loss', 'content': 4.725396502180956e-05, 'timestamp': '2025-09-10 02:25:59.725620', 'step': 4819, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 7119318626112}, 'timestamp': '2025-09-10 02:25:59.759943', 'step': 4819, 'epoch': 3} {'type': 'loss', 'content': 0.0006414660601876676, 'timestamp': '2025-09-10 02:25:59.787921', 'step': 4820, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 6170111174080}, 'timestamp': '2025-09-10 02:25:59.821105', 'step': 4820, 'epoch': 3} {'type': 'loss', 'content': 0.026372602209448814, 'timestamp': '2025-09-10 02:25:59.825502', 'step': 4821, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 7593922352128}, 'timestamp': '2025-09-10 02:25:59.859168', 'step': 4821, 'epoch': 3} {'type': 'loss', 'content': 0.004305239766836166, 'timestamp': '2025-09-10 02:25:59.866853', 'step': 4822, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 8068526078144}, 'timestamp': '2025-09-10 02:25:59.899931', 'step': 4822, 'epoch': 3} {'type': 'loss', 'content': 0.001554366433992982, 'timestamp': '2025-09-10 02:25:59.909434', 'step': 4823, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 6644714900096}, 'timestamp': '2025-09-10 02:25:59.943662', 'step': 4823, 'epoch': 3} {'type': 'loss', 'content': 1.843928112066351e-05, 'timestamp': '2025-09-10 02:25:59.971429', 'step': 4824, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 320], 'flops': 9492337256192}, 'timestamp': '2025-09-10 02:26:00.006017', 'step': 4824, 'epoch': 3} {'type': 'loss', 'content': 0.00037253022310324013, 'timestamp': '2025-09-10 02:26:00.016360', 'step': 4825, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 7119318626112}, 'timestamp': '2025-09-10 02:26:00.047393', 'step': 4825, 'epoch': 3} {'type': 'loss', 'content': 0.00015221700596157461, 'timestamp': '2025-09-10 02:26:00.054706', 'step': 4826, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 7119318626112}, 'timestamp': '2025-09-10 02:26:00.087117', 'step': 4826, 'epoch': 3} {'type': 'loss', 'content': 0.0029021056834608316, 'timestamp': '2025-09-10 02:26:00.094410', 'step': 4827, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 6170111174080}, 'timestamp': '2025-09-10 02:26:00.128880', 'step': 4827, 'epoch': 3} {'type': 'loss', 'content': 0.008609069511294365, 'timestamp': '2025-09-10 02:26:00.156657', 'step': 4828, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 7119318626112}, 'timestamp': '2025-09-10 02:26:00.190329', 'step': 4828, 'epoch': 3} {'type': 'loss', 'content': 0.01999555341899395, 'timestamp': '2025-09-10 02:26:00.195215', 'step': 4829, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 6644714900096}, 'timestamp': '2025-09-10 02:26:00.226485', 'step': 4829, 'epoch': 3} {'type': 'loss', 'content': 0.0010207198793068528, 'timestamp': '2025-09-10 02:26:00.233313', 'step': 4830, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 336], 'flops': 9966940982208}, 'timestamp': '2025-09-10 02:26:00.270031', 'step': 4830, 'epoch': 3} {'type': 'loss', 'content': 0.00045846131979487836, 'timestamp': '2025-09-10 02:26:00.283364', 'step': 4831, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 6170111174080}, 'timestamp': '2025-09-10 02:26:00.316424', 'step': 4831, 'epoch': 3} {'type': 'loss', 'content': 0.019452109932899475, 'timestamp': '2025-09-10 02:26:00.344133', 'step': 4832, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 320], 'flops': 9492337256192}, 'timestamp': '2025-09-10 02:26:00.379059', 'step': 4832, 'epoch': 3} {'type': 'loss', 'content': 0.028431635349988937, 'timestamp': '2025-09-10 02:26:00.389300', 'step': 4833, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 336], 'flops': 9966940982208}, 'timestamp': '2025-09-10 02:26:00.425448', 'step': 4833, 'epoch': 3} {'type': 'loss', 'content': 0.0021825884468853474, 'timestamp': '2025-09-10 02:26:00.438782', 'step': 4834, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 304], 'flops': 9017733530176}, 'timestamp': '2025-09-10 02:26:00.472698', 'step': 4834, 'epoch': 3} {'type': 'loss', 'content': 0.0035446875263005495, 'timestamp': '2025-09-10 02:26:00.484427', 'step': 4835, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 8068526078144}, 'timestamp': '2025-09-10 02:26:00.515445', 'step': 4835, 'epoch': 3} {'type': 'loss', 'content': 0.008904158137738705, 'timestamp': '2025-09-10 02:26:00.546371', 'step': 4836, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 6644714900096}, 'timestamp': '2025-09-10 02:26:00.577790', 'step': 4836, 'epoch': 3} {'type': 'loss', 'content': 0.025139760226011276, 'timestamp': '2025-09-10 02:26:00.582495', 'step': 4837, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 8543129804160}, 'timestamp': '2025-09-10 02:26:00.613669', 'step': 4837, 'epoch': 3} {'type': 'loss', 'content': 0.02474926970899105, 'timestamp': '2025-09-10 02:26:00.624621', 'step': 4838, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 4746299996032}, 'timestamp': '2025-09-10 02:26:00.655331', 'step': 4838, 'epoch': 3} {'type': 'loss', 'content': 0.005889459978789091, 'timestamp': '2025-09-10 02:26:00.658335', 'step': 4839, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 8068526078144}, 'timestamp': '2025-09-10 02:26:00.690023', 'step': 4839, 'epoch': 3} {'type': 'loss', 'content': 0.0004479756171349436, 'timestamp': '2025-09-10 02:26:00.721142', 'step': 4840, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 384], 'flops': 11390752160256}, 'timestamp': '2025-09-10 02:26:00.754422', 'step': 4840, 'epoch': 3} {'type': 'loss', 'content': 0.00183139240834862, 'timestamp': '2025-09-10 02:26:00.767786', 'step': 4841, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-10 02:26:00.800060', 'step': 4841, 'epoch': 3} {'type': 'loss', 'content': 0.0007262559956870973, 'timestamp': '2025-09-10 02:26:00.802493', 'step': 4842, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 320], 'flops': 9492337256192}, 'timestamp': '2025-09-10 02:26:00.833909', 'step': 4842, 'epoch': 3} {'type': 'loss', 'content': 0.0002282061759615317, 'timestamp': '2025-09-10 02:26:00.846515', 'step': 4843, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-10 02:26:00.876802', 'step': 4843, 'epoch': 3} {'type': 'loss', 'content': 0.00018490191723685712, 'timestamp': '2025-09-10 02:26:00.900295', 'step': 4844, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 7119318626112}, 'timestamp': '2025-09-10 02:26:00.930704', 'step': 4844, 'epoch': 3} {'type': 'loss', 'content': 0.037172622978687286, 'timestamp': '2025-09-10 02:26:00.935978', 'step': 4845, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 5695507448064}, 'timestamp': '2025-09-10 02:26:00.966702', 'step': 4845, 'epoch': 3} {'type': 'loss', 'content': 0.0007538548088632524, 'timestamp': '2025-09-10 02:26:00.970742', 'step': 4846, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 7119318626112}, 'timestamp': '2025-09-10 02:26:01.001708', 'step': 4846, 'epoch': 3} {'type': 'loss', 'content': 0.003542929422110319, 'timestamp': '2025-09-10 02:26:01.009177', 'step': 4847, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 7593922352128}, 'timestamp': '2025-09-10 02:26:01.039473', 'step': 4847, 'epoch': 3} {'type': 'loss', 'content': 0.0006386330351233482, 'timestamp': '2025-09-10 02:26:01.068042', 'step': 4848, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 304], 'flops': 9017733530176}, 'timestamp': '2025-09-10 02:26:01.098644', 'step': 4848, 'epoch': 3} {'type': 'loss', 'content': 0.0007097636116668582, 'timestamp': '2025-09-10 02:26:01.108611', 'step': 4849, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 6644714900096}, 'timestamp': '2025-09-10 02:26:01.139356', 'step': 4849, 'epoch': 3} {'type': 'loss', 'content': 0.0003923158801626414, 'timestamp': '2025-09-10 02:26:01.146692', 'step': 4850, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 304], 'flops': 9017733530176}, 'timestamp': '2025-09-10 02:26:01.176912', 'step': 4850, 'epoch': 3} {'type': 'loss', 'content': 0.001541634788736701, 'timestamp': '2025-09-10 02:26:01.189033', 'step': 4851, 'epoch': 3} {'type': 'flops', 'content': [{'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5695213340928}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3796808916480}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4429610391296}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4746011128704}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4746011128704}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6011614078336}, {'type': 'perplexity', 'in_batch_dim': [8, 736], 'batch_size': 8, 'flops': 14554433988352}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3796808916480}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4113209653888}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3796808916480}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4746011128704}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2847606704256}, {'type': 'perplexity', 'in_batch_dim': [8, 320], 'batch_size': 8, 'flops': 6328014815744}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5378812603520}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3796808916480}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4746011128704}, {'type': 'perplexity', 'in_batch_dim': [8, 352], 'batch_size': 8, 'flops': 6960816290560}, {'type': 'perplexity', 'in_batch_dim': [8, 384], 'batch_size': 8, 'flops': 7593617765376}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5378812603520}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4429610391296}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4113209653888}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5062411866112}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5062411866112}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4429610391296}, {'type': 'perplexity', 'in_batch_dim': [8, 480], 'batch_size': 8, 'flops': 9492022189824}, {'type': 'perplexity', 'in_batch_dim': [8, 448], 'batch_size': 8, 'flops': 8859220715008}, {'type': 'perplexity', 'in_batch_dim': [8, 320], 'batch_size': 8, 'flops': 6328014815744}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3796808916480}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6011614078336}, {'type': 'perplexity', 'in_batch_dim': [8, 320], 'batch_size': 8, 'flops': 6328014815744}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5695213340928}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4746011128704}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4429610391296}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5378812603520}, {'type': 'perplexity', 'in_batch_dim': [8, 544], 'batch_size': 8, 'flops': 10757625139456}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4429610391296}, {'type': 'perplexity', 'in_batch_dim': [8, 320], 'batch_size': 8, 'flops': 6328014815744}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4113209653888}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4113209653888}, {'type': 'perplexity', 'in_batch_dim': [8, 320], 'batch_size': 8, 'flops': 6328014815744}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3480408179072}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5062411866112}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6011614078336}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4429610391296}, {'type': 'perplexity', 'in_batch_dim': [8, 384], 'batch_size': 8, 'flops': 7593617765376}, {'type': 'perplexity', 'in_batch_dim': [8, 416], 'batch_size': 8, 'flops': 8226419240192}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4746011128704}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3480408179072}, {'type': 'perplexity', 'in_batch_dim': [8, 384], 'batch_size': 8, 'flops': 7593617765376}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3480408179072}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3480408179072}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5378812603520}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3796808916480}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4113209653888}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5062411866112}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5378812603520}, {'type': 'perplexity', 'in_batch_dim': [8, 320], 'batch_size': 8, 'flops': 6328014815744}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4429610391296}, {'type': 'perplexity', 'in_batch_dim': [8, 160], 'batch_size': 8, 'flops': 3164007441664}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4113209653888}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5695213340928}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3796808916480}, {'type': 'perplexity', 'in_batch_dim': [8, 160], 'batch_size': 8, 'flops': 3164007441664}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6011614078336}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5062411866112}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4429610391296}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3480408179072}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4746011128704}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3480408179072}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4113209653888}, {'type': 'perplexity', 'in_batch_dim': [8, 624], 'batch_size': 8, 'flops': 12339628826496}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4113209653888}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5695213340928}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5062411866112}, {'type': 'perplexity', 'in_batch_dim': [8, 384], 'batch_size': 8, 'flops': 7593617765376}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4429610391296}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4113209653888}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5062411866112}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4429610391296}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5695213340928}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3796808916480}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5378812603520}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4429610391296}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5378812603520}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3796808916480}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5695213340928}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4429610391296}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5062411866112}, {'type': 'perplexity', 'in_batch_dim': [8, 160], 'batch_size': 8, 'flops': 3164007441664}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4746011128704}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5695213340928}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3796808916480}, {'type': 'perplexity', 'in_batch_dim': [8, 416], 'batch_size': 8, 'flops': 8226419240192}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3796808916480}, {'type': 'perplexity', 'in_batch_dim': [8, 160], 'batch_size': 8, 'flops': 3164007441664}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4746011128704}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3796808916480}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2847606704256}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5378812603520}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4429610391296}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4113209653888}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4429610391296}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6011614078336}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4113209653888}, {'type': 'perplexity', 'in_batch_dim': [8, 320], 'batch_size': 8, 'flops': 6328014815744}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4746011128704}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4746011128704}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3796808916480}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5062411866112}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3480408179072}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3796808916480}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3480408179072}, {'type': 'perplexity', 'in_batch_dim': [8, 352], 'batch_size': 8, 'flops': 6960816290560}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5062411866112}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4429610391296}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6011614078336}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5062411866112}, {'type': 'perplexity', 'in_batch_dim': [8, 576], 'batch_size': 8, 'flops': 11390426614272}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4429610391296}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3796808916480}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6011614078336}, {'type': 'perplexity', 'in_batch_dim': [8, 432], 'batch_size': 8, 'flops': 8542819977600}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5378812603520}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5378812603520}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5062411866112}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4113209653888}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4113209653888}, {'type': 'perplexity', 'in_batch_dim': [8, 368], 'batch_size': 8, 'flops': 7277217027968}, {'type': 'perplexity', 'in_batch_dim': [8, 160], 'batch_size': 8, 'flops': 3164007441664}, {'type': 'perplexity', 'in_batch_dim': [8, 368], 'batch_size': 8, 'flops': 7277217027968}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3796808916480}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4746011128704}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4113209653888}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4429610391296}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5695213340928}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5695213340928}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5062411866112}, {'type': 'perplexity', 'in_batch_dim': [8, 432], 'batch_size': 8, 'flops': 8542819977600}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5062411866112}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4113209653888}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3480408179072}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3480408179072}, {'type': 'perplexity', 'in_batch_dim': [8, 160], 'batch_size': 8, 'flops': 3164007441664}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4113209653888}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3796808916480}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5062411866112}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 368], 'batch_size': 8, 'flops': 7277217027968}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5378812603520}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5378812603520}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5062411866112}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2847606704256}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4429610391296}, {'type': 'perplexity', 'in_batch_dim': [8, 320], 'batch_size': 8, 'flops': 6328014815744}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3796808916480}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4429610391296}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4113209653888}, {'type': 'perplexity', 'in_batch_dim': [8, 384], 'batch_size': 8, 'flops': 7593617765376}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5378812603520}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4746011128704}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3480408179072}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5062411866112}, {'type': 'perplexity', 'in_batch_dim': [8, 320], 'batch_size': 8, 'flops': 6328014815744}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5378812603520}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4113209653888}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4113209653888}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5695213340928}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3480408179072}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6011614078336}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4746011128704}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5378812603520}, {'type': 'perplexity', 'in_batch_dim': [8, 320], 'batch_size': 8, 'flops': 6328014815744}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4429610391296}, {'type': 'perplexity', 'in_batch_dim': [8, 320], 'batch_size': 8, 'flops': 6328014815744}, {'type': 'perplexity', 'in_batch_dim': [8, 416], 'batch_size': 8, 'flops': 8226419240192}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5062411866112}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5062411866112}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2847606704256}, {'type': 'perplexity', 'in_batch_dim': [8, 336], 'batch_size': 8, 'flops': 6644415553152}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6011614078336}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5062411866112}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4113209653888}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5695213340928}, {'type': 'perplexity', 'in_batch_dim': [8, 416], 'batch_size': 8, 'flops': 8226419240192}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3796808916480}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5695213340928}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4746011128704}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4429610391296}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4113209653888}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3480408179072}, {'type': 'perplexity', 'in_batch_dim': [8, 320], 'batch_size': 8, 'flops': 6328014815744}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3480408179072}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5378812603520}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6011614078336}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4429610391296}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5695213340928}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5695213340928}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3480408179072}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4429610391296}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5062411866112}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3480408179072}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4113209653888}, {'type': 'perplexity', 'in_batch_dim': [8, 352], 'batch_size': 8, 'flops': 6960816290560}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4113209653888}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2847606704256}, {'type': 'perplexity', 'in_batch_dim': [8, 160], 'batch_size': 8, 'flops': 3164007441664}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3796808916480}, {'type': 'perplexity', 'in_batch_dim': [8, 496], 'batch_size': 8, 'flops': 9808422927232}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3480408179072}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4113209653888}, {'type': 'perplexity', 'in_batch_dim': [8, 544], 'batch_size': 8, 'flops': 10757625139456}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6011614078336}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5378812603520}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5695213340928}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4746011128704}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5695213340928}, {'type': 'perplexity', 'in_batch_dim': [8, 384], 'batch_size': 8, 'flops': 7593617765376}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3480408179072}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5378812603520}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3480408179072}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5378812603520}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5062411866112}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3796808916480}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5062411866112}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5695213340928}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5378812603520}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4429610391296}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5062411866112}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3480408179072}, {'type': 'perplexity', 'in_batch_dim': [8, 656], 'batch_size': 8, 'flops': 12972430301312}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5062411866112}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3796808916480}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3480408179072}, {'type': 'perplexity', 'in_batch_dim': [8, 160], 'batch_size': 8, 'flops': 3164007441664}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5695213340928}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6011614078336}, {'type': 'perplexity', 'in_batch_dim': [8, 320], 'batch_size': 8, 'flops': 6328014815744}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5062411866112}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5062411866112}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5695213340928}, {'type': 'perplexity', 'in_batch_dim': [8, 320], 'batch_size': 8, 'flops': 6328014815744}, {'type': 'perplexity', 'in_batch_dim': [8, 384], 'batch_size': 8, 'flops': 7593617765376}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5062411866112}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4746011128704}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4113209653888}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4746011128704}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4746011128704}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3480408179072}, {'type': 'perplexity', 'in_batch_dim': [8, 320], 'batch_size': 8, 'flops': 6328014815744}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5695213340928}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5062411866112}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4113209653888}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4746011128704}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2847606704256}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3796808916480}, {'type': 'perplexity', 'in_batch_dim': [8, 336], 'batch_size': 8, 'flops': 6644415553152}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4113209653888}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4746011128704}, {'type': 'perplexity', 'in_batch_dim': [8, 496], 'batch_size': 8, 'flops': 9808422927232}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5378812603520}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5378812603520}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4429610391296}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5062411866112}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6011614078336}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4746011128704}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3796808916480}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4113209653888}, {'type': 'perplexity', 'in_batch_dim': [8, 496], 'batch_size': 8, 'flops': 9808422927232}, {'type': 'perplexity', 'in_batch_dim': [8, 448], 'batch_size': 8, 'flops': 8859220715008}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6011614078336}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3796808916480}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3480408179072}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5062411866112}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4429610391296}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4113209653888}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4746011128704}, {'type': 'perplexity', 'in_batch_dim': [8, 320], 'batch_size': 8, 'flops': 6328014815744}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6011614078336}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3480408179072}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3796808916480}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3796808916480}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4746011128704}, {'type': 'perplexity', 'in_batch_dim': [8, 416], 'batch_size': 8, 'flops': 8226419240192}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5695213340928}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6011614078336}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6011614078336}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4746011128704}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3480408179072}, {'type': 'perplexity', 'in_batch_dim': [8, 160], 'batch_size': 8, 'flops': 3164007441664}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5695213340928}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3480408179072}, {'type': 'perplexity', 'in_batch_dim': [8, 400], 'batch_size': 8, 'flops': 7910018502784}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4429610391296}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5378812603520}, {'type': 'perplexity', 'in_batch_dim': [8, 400], 'batch_size': 8, 'flops': 7910018502784}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5378812603520}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3480408179072}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6011614078336}, {'type': 'perplexity', 'in_batch_dim': [8, 160], 'batch_size': 8, 'flops': 3164007441664}, {'type': 'perplexity', 'in_batch_dim': [8, 352], 'batch_size': 8, 'flops': 6960816290560}, {'type': 'perplexity', 'in_batch_dim': [8, 320], 'batch_size': 8, 'flops': 6328014815744}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5062411866112}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5062411866112}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4113209653888}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4429610391296}, {'type': 'perplexity', 'in_batch_dim': [8, 352], 'batch_size': 8, 'flops': 6960816290560}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4113209653888}, {'type': 'perplexity', 'in_batch_dim': [8, 160], 'batch_size': 8, 'flops': 3164007441664}, {'type': 'perplexity', 'in_batch_dim': [8, 336], 'batch_size': 8, 'flops': 6644415553152}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4429610391296}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3796808916480}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5378812603520}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5378812603520}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4113209653888}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5378812603520}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3796808916480}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5378812603520}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5378812603520}, {'type': 'perplexity', 'in_batch_dim': [8, 160], 'batch_size': 8, 'flops': 3164007441664}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3480408179072}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6011614078336}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5695213340928}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5378812603520}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5378812603520}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4429610391296}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4746011128704}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6011614078336}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5062411866112}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6011614078336}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5378812603520}, {'type': 'perplexity', 'in_batch_dim': [8, 160], 'batch_size': 8, 'flops': 3164007441664}, {'type': 'perplexity', 'in_batch_dim': [8, 160], 'batch_size': 8, 'flops': 3164007441664}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5062411866112}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5062411866112}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3796808916480}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6011614078336}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4113209653888}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4746011128704}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5378812603520}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3480408179072}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3796808916480}, {'type': 'perplexity', 'in_batch_dim': [8, 320], 'batch_size': 8, 'flops': 6328014815744}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5062411866112}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4746011128704}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5062411866112}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4746011128704}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6011614078336}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3796808916480}, {'type': 'perplexity', 'in_batch_dim': [8, 160], 'batch_size': 8, 'flops': 3164007441664}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2847606704256}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4429610391296}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4746011128704}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5062411866112}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5378812603520}, {'type': 'perplexity', 'in_batch_dim': [8, 464], 'batch_size': 8, 'flops': 9175621452416}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4113209653888}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3480408179072}, {'type': 'perplexity', 'in_batch_dim': [8, 384], 'batch_size': 8, 'flops': 7593617765376}, {'type': 'perplexity', 'in_batch_dim': [8, 448], 'batch_size': 8, 'flops': 8859220715008}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5378812603520}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4113209653888}, {'type': 'perplexity', 'in_batch_dim': [8, 320], 'batch_size': 8, 'flops': 6328014815744}, {'type': 'perplexity', 'in_batch_dim': [8, 560], 'batch_size': 8, 'flops': 11074025876864}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5378812603520}, {'type': 'perplexity', 'in_batch_dim': [8, 160], 'batch_size': 8, 'flops': 3164007441664}, {'type': 'perplexity', 'in_batch_dim': [8, 320], 'batch_size': 8, 'flops': 6328014815744}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5062411866112}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6011614078336}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4746011128704}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6011614078336}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3796808916480}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4429610391296}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4113209653888}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4746011128704}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3480408179072}, {'type': 'perplexity', 'in_batch_dim': [8, 384], 'batch_size': 8, 'flops': 7593617765376}, {'type': 'perplexity', 'in_batch_dim': [8, 576], 'batch_size': 8, 'flops': 11390426614272}, {'type': 'perplexity', 'in_batch_dim': [8, 160], 'batch_size': 8, 'flops': 3164007441664}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4746011128704}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4746011128704}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4429610391296}, {'type': 'perplexity', 'in_batch_dim': [8, 336], 'batch_size': 8, 'flops': 6644415553152}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4746011128704}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3796808916480}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3796808916480}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5062411866112}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5378812603520}, {'type': 'perplexity', 'in_batch_dim': [8, 368], 'batch_size': 8, 'flops': 7277217027968}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3480408179072}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4113209653888}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4746011128704}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3480408179072}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4746011128704}, {'type': 'perplexity', 'in_batch_dim': [8, 1168], 'batch_size': 8, 'flops': 23097253898368}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5062411866112}, {'type': 'perplexity', 'in_batch_dim': [8, 160], 'batch_size': 8, 'flops': 3164007441664}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4746011128704}, {'type': 'perplexity', 'in_batch_dim': [8, 640], 'batch_size': 8, 'flops': 12656029563904}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5378812603520}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4429610391296}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4429610391296}, {'type': 'perplexity', 'in_batch_dim': [8, 384], 'batch_size': 8, 'flops': 7593617765376}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3480408179072}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4113209653888}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3796808916480}, {'type': 'perplexity', 'in_batch_dim': [8, 336], 'batch_size': 8, 'flops': 6644415553152}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3480408179072}, {'type': 'perplexity', 'in_batch_dim': [6, 208], 'batch_size': 8, 'flops': 4113209653888}], 'timestamp': '2025-09-10 02:26:11.251927', 'step': 4851, 'epoch': 3} {'type': 'pplx', 'content': 22527274.187912628, 'timestamp': '2025-09-10 02:26:11.255094', 'step': 4851, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 6170111174080}, 'timestamp': '2025-09-10 02:26:11.285937', 'step': 4851, 'epoch': 3} {'type': 'loss', 'content': 0.0015954956179484725, 'timestamp': '2025-09-10 02:26:11.312678', 'step': 4852, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 6644714900096}, 'timestamp': '2025-09-10 02:26:11.346481', 'step': 4852, 'epoch': 3} {'type': 'loss', 'content': 0.00044376106234267354, 'timestamp': '2025-09-10 02:26:11.351218', 'step': 4853, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 7593922352128}, 'timestamp': '2025-09-10 02:26:11.383233', 'step': 4853, 'epoch': 3} {'type': 'loss', 'content': 0.0005625728517770767, 'timestamp': '2025-09-10 02:26:11.390990', 'step': 4854, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 352], 'flops': 10441544708224}, 'timestamp': '2025-09-10 02:26:11.428386', 'step': 4854, 'epoch': 3} {'type': 'loss', 'content': 0.009019298478960991, 'timestamp': '2025-09-10 02:26:11.442133', 'step': 4855, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 7119318626112}, 'timestamp': '2025-09-10 02:26:11.473458', 'step': 4855, 'epoch': 3} {'type': 'loss', 'content': 0.00413868110626936, 'timestamp': '2025-09-10 02:26:11.501704', 'step': 4856, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 7119318626112}, 'timestamp': '2025-09-10 02:26:11.532532', 'step': 4856, 'epoch': 3} {'type': 'loss', 'content': 0.003734805155545473, 'timestamp': '2025-09-10 02:26:11.537572', 'step': 4857, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 7593922352128}, 'timestamp': '2025-09-10 02:26:11.568519', 'step': 4857, 'epoch': 3} {'type': 'loss', 'content': 0.00028699575341306627, 'timestamp': '2025-09-10 02:26:11.576305', 'step': 4858, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 8068526078144}, 'timestamp': '2025-09-10 02:26:11.607157', 'step': 4858, 'epoch': 3} {'type': 'loss', 'content': 0.00013391379616223276, 'timestamp': '2025-09-10 02:26:11.617438', 'step': 4859, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 6170111174080}, 'timestamp': '2025-09-10 02:26:11.649472', 'step': 4859, 'epoch': 3} {'type': 'loss', 'content': 0.008628031238913536, 'timestamp': '2025-09-10 02:26:11.677213', 'step': 4860, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 8068526078144}, 'timestamp': '2025-09-10 02:26:11.711931', 'step': 4860, 'epoch': 3} {'type': 'loss', 'content': 0.0020894031040370464, 'timestamp': '2025-09-10 02:26:11.719440', 'step': 4861, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 7593922352128}, 'timestamp': '2025-09-10 02:26:11.752557', 'step': 4861, 'epoch': 3} {'type': 'loss', 'content': 0.005519864149391651, 'timestamp': '2025-09-10 02:26:11.760336', 'step': 4862, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 6644714900096}, 'timestamp': '2025-09-10 02:26:11.792765', 'step': 4862, 'epoch': 3} {'type': 'loss', 'content': 0.005144777707755566, 'timestamp': '2025-09-10 02:26:11.799769', 'step': 4863, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 6170111174080}, 'timestamp': '2025-09-10 02:26:11.830246', 'step': 4863, 'epoch': 3} {'type': 'loss', 'content': 0.00043783331057056785, 'timestamp': '2025-09-10 02:26:11.857871', 'step': 4864, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 368], 'flops': 10916148434240}, 'timestamp': '2025-09-10 02:26:11.890550', 'step': 4864, 'epoch': 3} {'type': 'loss', 'content': 0.005862680729478598, 'timestamp': '2025-09-10 02:26:11.903662', 'step': 4865, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 8068526078144}, 'timestamp': '2025-09-10 02:26:11.934633', 'step': 4865, 'epoch': 3} {'type': 'loss', 'content': 0.007736521307379007, 'timestamp': '2025-09-10 02:26:11.945036', 'step': 4866, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-10 02:26:11.975688', 'step': 4866, 'epoch': 3} {'type': 'loss', 'content': 0.004861840512603521, 'timestamp': '2025-09-10 02:26:11.977952', 'step': 4867, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 6170111174080}, 'timestamp': '2025-09-10 02:26:12.008274', 'step': 4867, 'epoch': 3} {'type': 'loss', 'content': 0.005645510274916887, 'timestamp': '2025-09-10 02:26:12.036047', 'step': 4868, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 320], 'flops': 9492337256192}, 'timestamp': '2025-09-10 02:26:12.067329', 'step': 4868, 'epoch': 3} {'type': 'loss', 'content': 0.002813952276483178, 'timestamp': '2025-09-10 02:26:12.077850', 'step': 4869, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 176], 'flops': 5220903722048}, 'timestamp': '2025-09-10 02:26:12.108352', 'step': 4869, 'epoch': 3} {'type': 'loss', 'content': 0.005273285787552595, 'timestamp': '2025-09-10 02:26:12.112325', 'step': 4870, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 4746299996032}, 'timestamp': '2025-09-10 02:26:12.143472', 'step': 4870, 'epoch': 3} {'type': 'loss', 'content': 0.008787405677139759, 'timestamp': '2025-09-10 02:26:12.145799', 'step': 4871, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 304], 'flops': 9017733530176}, 'timestamp': '2025-09-10 02:26:12.178570', 'step': 4871, 'epoch': 3} {'type': 'loss', 'content': 0.001541761914268136, 'timestamp': '2025-09-10 02:26:12.210555', 'step': 4872, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 320], 'flops': 9492337256192}, 'timestamp': '2025-09-10 02:26:12.245477', 'step': 4872, 'epoch': 3} {'type': 'loss', 'content': 0.011291869916021824, 'timestamp': '2025-09-10 02:26:12.253776', 'step': 4873, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 7119318626112}, 'timestamp': '2025-09-10 02:26:12.286054', 'step': 4873, 'epoch': 3} {'type': 'loss', 'content': 0.001998367952182889, 'timestamp': '2025-09-10 02:26:12.293055', 'step': 4874, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 5695507448064}, 'timestamp': '2025-09-10 02:26:12.329385', 'step': 4874, 'epoch': 3} {'type': 'loss', 'content': 0.005770617164671421, 'timestamp': '2025-09-10 02:26:12.333776', 'step': 4875, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 5695507448064}, 'timestamp': '2025-09-10 02:26:12.365533', 'step': 4875, 'epoch': 3} {'type': 'loss', 'content': 0.0012409423943609, 'timestamp': '2025-09-10 02:26:12.390766', 'step': 4876, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 6170111174080}, 'timestamp': '2025-09-10 02:26:12.424494', 'step': 4876, 'epoch': 3} {'type': 'loss', 'content': 0.001259890734218061, 'timestamp': '2025-09-10 02:26:12.428830', 'step': 4877, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 6644714900096}, 'timestamp': '2025-09-10 02:26:12.459636', 'step': 4877, 'epoch': 3} {'type': 'loss', 'content': 0.0013752224622294307, 'timestamp': '2025-09-10 02:26:12.466402', 'step': 4878, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 6170111174080}, 'timestamp': '2025-09-10 02:26:12.498281', 'step': 4878, 'epoch': 3} {'type': 'loss', 'content': 0.00954064168035984, 'timestamp': '2025-09-10 02:26:12.504671', 'step': 4879, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 8068526078144}, 'timestamp': '2025-09-10 02:26:12.538044', 'step': 4879, 'epoch': 3} {'type': 'loss', 'content': 0.0020668611396104097, 'timestamp': '2025-09-10 02:26:12.568741', 'step': 4880, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 304], 'flops': 9017733530176}, 'timestamp': '2025-09-10 02:26:12.602992', 'step': 4880, 'epoch': 3} {'type': 'loss', 'content': 0.00032076804200187325, 'timestamp': '2025-09-10 02:26:12.612119', 'step': 4881, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 352], 'flops': 10441544708224}, 'timestamp': '2025-09-10 02:26:12.649546', 'step': 4881, 'epoch': 3} {'type': 'loss', 'content': 0.0008431184687651694, 'timestamp': '2025-09-10 02:26:12.663261', 'step': 4882, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 336], 'flops': 9966940982208}, 'timestamp': '2025-09-10 02:26:12.705884', 'step': 4882, 'epoch': 3} {'type': 'loss', 'content': 0.004074872005730867, 'timestamp': '2025-09-10 02:26:12.719281', 'step': 4883, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 176], 'flops': 5220903722048}, 'timestamp': '2025-09-10 02:26:12.752046', 'step': 4883, 'epoch': 3} {'type': 'loss', 'content': 0.002831391990184784, 'timestamp': '2025-09-10 02:26:12.776773', 'step': 4884, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 5695507448064}, 'timestamp': '2025-09-10 02:26:12.808218', 'step': 4884, 'epoch': 3} {'type': 'loss', 'content': 0.004910766612738371, 'timestamp': '2025-09-10 02:26:12.810889', 'step': 4885, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 320], 'flops': 9492337256192}, 'timestamp': '2025-09-10 02:26:12.841938', 'step': 4885, 'epoch': 3} {'type': 'loss', 'content': 0.009595355950295925, 'timestamp': '2025-09-10 02:26:12.854581', 'step': 4886, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 352], 'flops': 10441544708224}, 'timestamp': '2025-09-10 02:26:12.888932', 'step': 4886, 'epoch': 3} {'type': 'loss', 'content': 0.013698582537472248, 'timestamp': '2025-09-10 02:26:12.902772', 'step': 4887, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 176], 'flops': 5220903722048}, 'timestamp': '2025-09-10 02:26:12.933454', 'step': 4887, 'epoch': 3} {'type': 'loss', 'content': 0.0003295644710306078, 'timestamp': '2025-09-10 02:26:12.958026', 'step': 4888, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 6644714900096}, 'timestamp': '2025-09-10 02:26:12.988869', 'step': 4888, 'epoch': 3} {'type': 'loss', 'content': 5.0092607125407085e-05, 'timestamp': '2025-09-10 02:26:12.993445', 'step': 4889, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 320], 'flops': 9492337256192}, 'timestamp': '2025-09-10 02:26:13.026601', 'step': 4889, 'epoch': 3} {'type': 'loss', 'content': 0.0013606924330815673, 'timestamp': '2025-09-10 02:26:13.039119', 'step': 4890, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 6644714900096}, 'timestamp': '2025-09-10 02:26:13.070766', 'step': 4890, 'epoch': 3} {'type': 'loss', 'content': 0.0027366813737899065, 'timestamp': '2025-09-10 02:26:13.077822', 'step': 4891, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 608], 'flops': 18035204324480}, 'timestamp': '2025-09-10 02:26:13.131591', 'step': 4891, 'epoch': 3} {'type': 'loss', 'content': 0.004872309975326061, 'timestamp': '2025-09-10 02:26:13.173871', 'step': 4892, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 320], 'flops': 9492337256192}, 'timestamp': '2025-09-10 02:26:13.208531', 'step': 4892, 'epoch': 3} {'type': 'loss', 'content': 0.0004739653959404677, 'timestamp': '2025-09-10 02:26:13.217311', 'step': 4893, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 5695507448064}, 'timestamp': '2025-09-10 02:26:13.249819', 'step': 4893, 'epoch': 3} {'type': 'loss', 'content': 0.0016823039622977376, 'timestamp': '2025-09-10 02:26:13.254188', 'step': 4894, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 5695507448064}, 'timestamp': '2025-09-10 02:26:13.285267', 'step': 4894, 'epoch': 3} {'type': 'loss', 'content': 0.004189238417893648, 'timestamp': '2025-09-10 02:26:13.289701', 'step': 4895, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 352], 'flops': 10441544708224}, 'timestamp': '2025-09-10 02:26:13.334340', 'step': 4895, 'epoch': 3} {'type': 'loss', 'content': 0.0009055934497155249, 'timestamp': '2025-09-10 02:26:13.369012', 'step': 4896, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 384], 'flops': 11390752160256}, 'timestamp': '2025-09-10 02:26:13.403183', 'step': 4896, 'epoch': 3} {'type': 'loss', 'content': 0.0026929269079118967, 'timestamp': '2025-09-10 02:26:13.416504', 'step': 4897, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 5695507448064}, 'timestamp': '2025-09-10 02:26:13.447231', 'step': 4897, 'epoch': 3} {'type': 'loss', 'content': 0.0013669952750205994, 'timestamp': '2025-09-10 02:26:13.451788', 'step': 4898, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 6170111174080}, 'timestamp': '2025-09-10 02:26:13.483646', 'step': 4898, 'epoch': 3} {'type': 'loss', 'content': 0.0022419628221541643, 'timestamp': '2025-09-10 02:26:13.490450', 'step': 4899, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 6170111174080}, 'timestamp': '2025-09-10 02:26:13.523859', 'step': 4899, 'epoch': 3} {'type': 'loss', 'content': 0.0009813571814447641, 'timestamp': '2025-09-10 02:26:13.551769', 'step': 4900, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 5695507448064}, 'timestamp': '2025-09-10 02:26:13.583180', 'step': 4900, 'epoch': 3} {'type': 'loss', 'content': 0.0005111963837407529, 'timestamp': '2025-09-10 02:26:13.586698', 'step': 4901, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 8068526078144}, 'timestamp': '2025-09-10 02:26:13.617822', 'step': 4901, 'epoch': 3} {'type': 'loss', 'content': 0.018179837614297867, 'timestamp': '2025-09-10 02:26:13.628203', 'step': 4902, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 5695507448064}, 'timestamp': '2025-09-10 02:26:13.658289', 'step': 4902, 'epoch': 3} {'type': 'loss', 'content': 0.0024603954516351223, 'timestamp': '2025-09-10 02:26:13.662869', 'step': 4903, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 7593922352128}, 'timestamp': '2025-09-10 02:26:13.694489', 'step': 4903, 'epoch': 3} {'type': 'loss', 'content': 7.283476588781923e-05, 'timestamp': '2025-09-10 02:26:13.723154', 'step': 4904, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 6644714900096}, 'timestamp': '2025-09-10 02:26:13.753868', 'step': 4904, 'epoch': 3} {'type': 'loss', 'content': 0.0006378447869792581, 'timestamp': '2025-09-10 02:26:13.758616', 'step': 4905, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 368], 'flops': 10916148434240}, 'timestamp': '2025-09-10 02:26:13.793601', 'step': 4905, 'epoch': 3} {'type': 'loss', 'content': 0.005772311706095934, 'timestamp': '2025-09-10 02:26:13.807551', 'step': 4906, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 7593922352128}, 'timestamp': '2025-09-10 02:26:13.841772', 'step': 4906, 'epoch': 3} {'type': 'loss', 'content': 0.00017352063150610775, 'timestamp': '2025-09-10 02:26:13.849465', 'step': 4907, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 5695507448064}, 'timestamp': '2025-09-10 02:26:13.880186', 'step': 4907, 'epoch': 3} {'type': 'loss', 'content': 0.0007246700115501881, 'timestamp': '2025-09-10 02:26:13.905590', 'step': 4908, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 5695507448064}, 'timestamp': '2025-09-10 02:26:13.937751', 'step': 4908, 'epoch': 3} {'type': 'loss', 'content': 0.0003136695013381541, 'timestamp': '2025-09-10 02:26:13.942598', 'step': 4909, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 7593922352128}, 'timestamp': '2025-09-10 02:26:13.974113', 'step': 4909, 'epoch': 3} {'type': 'loss', 'content': 0.005124423187226057, 'timestamp': '2025-09-10 02:26:13.982009', 'step': 4910, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 8068526078144}, 'timestamp': '2025-09-10 02:26:14.013185', 'step': 4910, 'epoch': 3} {'type': 'loss', 'content': 0.0003622827643994242, 'timestamp': '2025-09-10 02:26:14.023544', 'step': 4911, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 6644714900096}, 'timestamp': '2025-09-10 02:26:14.054555', 'step': 4911, 'epoch': 3} {'type': 'loss', 'content': 0.01249981764703989, 'timestamp': '2025-09-10 02:26:14.082638', 'step': 4912, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 6644714900096}, 'timestamp': '2025-09-10 02:26:14.114302', 'step': 4912, 'epoch': 3} {'type': 'loss', 'content': 0.0059426832012832165, 'timestamp': '2025-09-10 02:26:14.119050', 'step': 4913, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 8543129804160}, 'timestamp': '2025-09-10 02:26:14.150381', 'step': 4913, 'epoch': 3} {'type': 'loss', 'content': 0.00011479367094580084, 'timestamp': '2025-09-10 02:26:14.161544', 'step': 4914, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 6170111174080}, 'timestamp': '2025-09-10 02:26:14.191668', 'step': 4914, 'epoch': 3} {'type': 'loss', 'content': 0.0021279591601341963, 'timestamp': '2025-09-10 02:26:14.198639', 'step': 4915, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 7119318626112}, 'timestamp': '2025-09-10 02:26:14.229407', 'step': 4915, 'epoch': 3} {'type': 'loss', 'content': 4.092241215403192e-05, 'timestamp': '2025-09-10 02:26:14.257786', 'step': 4916, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 7119318626112}, 'timestamp': '2025-09-10 02:26:14.288854', 'step': 4916, 'epoch': 3} {'type': 'loss', 'content': 0.00016833031259011477, 'timestamp': '2025-09-10 02:26:14.293904', 'step': 4917, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 7593922352128}, 'timestamp': '2025-09-10 02:26:14.327484', 'step': 4917, 'epoch': 3} {'type': 'loss', 'content': 0.000857060425914824, 'timestamp': '2025-09-10 02:26:14.335234', 'step': 4918, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 7593922352128}, 'timestamp': '2025-09-10 02:26:14.366296', 'step': 4918, 'epoch': 3} {'type': 'loss', 'content': 4.280987195670605e-05, 'timestamp': '2025-09-10 02:26:14.373922', 'step': 4919, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 7119318626112}, 'timestamp': '2025-09-10 02:26:14.404028', 'step': 4919, 'epoch': 3} {'type': 'loss', 'content': 0.001588566112332046, 'timestamp': '2025-09-10 02:26:14.432402', 'step': 4920, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 6170111174080}, 'timestamp': '2025-09-10 02:26:14.462073', 'step': 4920, 'epoch': 3} {'type': 'loss', 'content': 0.03219066932797432, 'timestamp': '2025-09-10 02:26:14.466695', 'step': 4921, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 304], 'flops': 9017733530176}, 'timestamp': '2025-09-10 02:26:14.497215', 'step': 4921, 'epoch': 3} {'type': 'loss', 'content': 0.00037080320180393755, 'timestamp': '2025-09-10 02:26:14.509609', 'step': 4922, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 7593922352128}, 'timestamp': '2025-09-10 02:26:14.539847', 'step': 4922, 'epoch': 3} {'type': 'loss', 'content': 0.00011398802598705515, 'timestamp': '2025-09-10 02:26:14.547735', 'step': 4923, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 176], 'flops': 5220903722048}, 'timestamp': '2025-09-10 02:26:14.578237', 'step': 4923, 'epoch': 3} {'type': 'loss', 'content': 0.00020103438873775303, 'timestamp': '2025-09-10 02:26:14.603447', 'step': 4924, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 7593922352128}, 'timestamp': '2025-09-10 02:26:14.635658', 'step': 4924, 'epoch': 3} {'type': 'loss', 'content': 0.0020608811173588037, 'timestamp': '2025-09-10 02:26:14.640856', 'step': 4925, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 5695507448064}, 'timestamp': '2025-09-10 02:26:14.671556', 'step': 4925, 'epoch': 3} {'type': 'loss', 'content': 6.329066673060879e-05, 'timestamp': '2025-09-10 02:26:14.675957', 'step': 4926, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 5695507448064}, 'timestamp': '2025-09-10 02:26:14.706195', 'step': 4926, 'epoch': 3} {'type': 'loss', 'content': 0.008835741318762302, 'timestamp': '2025-09-10 02:26:14.710912', 'step': 4927, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 176], 'flops': 5220903722048}, 'timestamp': '2025-09-10 02:26:14.741722', 'step': 4927, 'epoch': 3} {'type': 'loss', 'content': 0.00034493012935854495, 'timestamp': '2025-09-10 02:26:14.766534', 'step': 4928, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 7119318626112}, 'timestamp': '2025-09-10 02:26:14.796398', 'step': 4928, 'epoch': 3} {'type': 'loss', 'content': 0.003803166327998042, 'timestamp': '2025-09-10 02:26:14.801450', 'step': 4929, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 6170111174080}, 'timestamp': '2025-09-10 02:26:14.832599', 'step': 4929, 'epoch': 3} {'type': 'loss', 'content': 0.00011779103806475177, 'timestamp': '2025-09-10 02:26:14.839344', 'step': 4930, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 5695507448064}, 'timestamp': '2025-09-10 02:26:14.869905', 'step': 4930, 'epoch': 3} {'type': 'loss', 'content': 0.0015055211260914803, 'timestamp': '2025-09-10 02:26:14.874259', 'step': 4931, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 336], 'flops': 9966940982208}, 'timestamp': '2025-09-10 02:26:14.907594', 'step': 4931, 'epoch': 3} {'type': 'loss', 'content': 0.0029439402278512716, 'timestamp': '2025-09-10 02:26:14.941862', 'step': 4932, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 6644714900096}, 'timestamp': '2025-09-10 02:26:14.973346', 'step': 4932, 'epoch': 3} {'type': 'loss', 'content': 0.00018022792937699705, 'timestamp': '2025-09-10 02:26:14.977958', 'step': 4933, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 320], 'flops': 9492337256192}, 'timestamp': '2025-09-10 02:26:15.009235', 'step': 4933, 'epoch': 3} {'type': 'loss', 'content': 0.008141091093420982, 'timestamp': '2025-09-10 02:26:15.021774', 'step': 4934, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 352], 'flops': 10441544708224}, 'timestamp': '2025-09-10 02:26:15.055739', 'step': 4934, 'epoch': 3} {'type': 'loss', 'content': 0.00017168700287584215, 'timestamp': '2025-09-10 02:26:15.069567', 'step': 4935, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 6644714900096}, 'timestamp': '2025-09-10 02:26:15.099959', 'step': 4935, 'epoch': 3} {'type': 'loss', 'content': 9.493528341408819e-05, 'timestamp': '2025-09-10 02:26:15.128009', 'step': 4936, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 7119318626112}, 'timestamp': '2025-09-10 02:26:15.159409', 'step': 4936, 'epoch': 3} {'type': 'loss', 'content': 0.00017535120423417538, 'timestamp': '2025-09-10 02:26:15.164334', 'step': 4937, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 6170111174080}, 'timestamp': '2025-09-10 02:26:15.195047', 'step': 4937, 'epoch': 3} {'type': 'loss', 'content': 0.00033908261684700847, 'timestamp': '2025-09-10 02:26:15.202033', 'step': 4938, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 528], 'flops': 15662185694400}, 'timestamp': '2025-09-10 02:26:15.248703', 'step': 4938, 'epoch': 3} {'type': 'loss', 'content': 0.00015323214756790549, 'timestamp': '2025-09-10 02:26:15.267902', 'step': 4939, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 8543129804160}, 'timestamp': '2025-09-10 02:26:15.301186', 'step': 4939, 'epoch': 3} {'type': 'loss', 'content': 0.0017902174731716514, 'timestamp': '2025-09-10 02:26:15.333311', 'step': 4940, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 8543129804160}, 'timestamp': '2025-09-10 02:26:15.363730', 'step': 4940, 'epoch': 3} {'type': 'loss', 'content': 0.00014680066669825464, 'timestamp': '2025-09-10 02:26:15.372333', 'step': 4941, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 5695507448064}, 'timestamp': '2025-09-10 02:26:15.402692', 'step': 4941, 'epoch': 3} {'type': 'loss', 'content': 0.00020298264280427247, 'timestamp': '2025-09-10 02:26:15.407122', 'step': 4942, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 336], 'flops': 9966940982208}, 'timestamp': '2025-09-10 02:26:15.440189', 'step': 4942, 'epoch': 3} {'type': 'loss', 'content': 0.00014298847236204892, 'timestamp': '2025-09-10 02:26:15.453645', 'step': 4943, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 320], 'flops': 9492337256192}, 'timestamp': '2025-09-10 02:26:15.485754', 'step': 4943, 'epoch': 3} {'type': 'loss', 'content': 0.00034119986230507493, 'timestamp': '2025-09-10 02:26:15.519215', 'step': 4944, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 400], 'flops': 11865355886272}, 'timestamp': '2025-09-10 02:26:15.554520', 'step': 4944, 'epoch': 3} {'type': 'loss', 'content': 0.00043467155774123967, 'timestamp': '2025-09-10 02:26:15.569606', 'step': 4945, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 8068526078144}, 'timestamp': '2025-09-10 02:26:15.600181', 'step': 4945, 'epoch': 3} {'type': 'loss', 'content': 0.00013701003626920283, 'timestamp': '2025-09-10 02:26:15.610359', 'step': 4946, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 304], 'flops': 9017733530176}, 'timestamp': '2025-09-10 02:26:15.640935', 'step': 4946, 'epoch': 3} {'type': 'loss', 'content': 0.013453126884996891, 'timestamp': '2025-09-10 02:26:15.653151', 'step': 4947, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 304], 'flops': 9017733530176}, 'timestamp': '2025-09-10 02:26:15.683338', 'step': 4947, 'epoch': 3} {'type': 'loss', 'content': 7.32469925424084e-05, 'timestamp': '2025-09-10 02:26:15.716450', 'step': 4948, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 6644714900096}, 'timestamp': '2025-09-10 02:26:15.746478', 'step': 4948, 'epoch': 3} {'type': 'loss', 'content': 0.06172531098127365, 'timestamp': '2025-09-10 02:26:15.751195', 'step': 4949, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 6644714900096}, 'timestamp': '2025-09-10 02:26:15.783122', 'step': 4949, 'epoch': 3} {'type': 'loss', 'content': 0.0008722272468730807, 'timestamp': '2025-09-10 02:26:15.790214', 'step': 4950, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 7593922352128}, 'timestamp': '2025-09-10 02:26:15.820155', 'step': 4950, 'epoch': 3} {'type': 'loss', 'content': 0.04367053508758545, 'timestamp': '2025-09-10 02:26:15.827844', 'step': 4951, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 6170111174080}, 'timestamp': '2025-09-10 02:26:15.857422', 'step': 4951, 'epoch': 3} {'type': 'loss', 'content': 0.0004010576813016087, 'timestamp': '2025-09-10 02:26:15.885360', 'step': 4952, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 7119318626112}, 'timestamp': '2025-09-10 02:26:15.915490', 'step': 4952, 'epoch': 3} {'type': 'loss', 'content': 3.4004107874352485e-05, 'timestamp': '2025-09-10 02:26:15.920410', 'step': 4953, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 400], 'flops': 11865355886272}, 'timestamp': '2025-09-10 02:26:15.957792', 'step': 4953, 'epoch': 3} {'type': 'loss', 'content': 0.007229152601212263, 'timestamp': '2025-09-10 02:26:15.973402', 'step': 4954, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 7593922352128}, 'timestamp': '2025-09-10 02:26:16.004631', 'step': 4954, 'epoch': 3} {'type': 'loss', 'content': 5.849377703270875e-05, 'timestamp': '2025-09-10 02:26:16.012289', 'step': 4955, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 6644714900096}, 'timestamp': '2025-09-10 02:26:16.042679', 'step': 4955, 'epoch': 3} {'type': 'loss', 'content': 0.004347801208496094, 'timestamp': '2025-09-10 02:26:16.070650', 'step': 4956, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 176], 'flops': 5220903722048}, 'timestamp': '2025-09-10 02:26:16.101178', 'step': 4956, 'epoch': 3} {'type': 'loss', 'content': 0.0001966599520528689, 'timestamp': '2025-09-10 02:26:16.104169', 'step': 4957, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-10 02:26:16.135122', 'step': 4957, 'epoch': 3} {'type': 'loss', 'content': 0.00021397892851382494, 'timestamp': '2025-09-10 02:26:16.137429', 'step': 4958, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 7119318626112}, 'timestamp': '2025-09-10 02:26:16.168886', 'step': 4958, 'epoch': 3} {'type': 'loss', 'content': 0.00024242003564722836, 'timestamp': '2025-09-10 02:26:16.176400', 'step': 4959, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 6644714900096}, 'timestamp': '2025-09-10 02:26:16.206448', 'step': 4959, 'epoch': 3} {'type': 'loss', 'content': 0.0008051989716477692, 'timestamp': '2025-09-10 02:26:16.234381', 'step': 4960, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 7593922352128}, 'timestamp': '2025-09-10 02:26:16.265419', 'step': 4960, 'epoch': 3} {'type': 'loss', 'content': 0.0025039296597242355, 'timestamp': '2025-09-10 02:26:16.270792', 'step': 4961, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 416], 'flops': 12339959612288}, 'timestamp': '2025-09-10 02:26:16.308395', 'step': 4961, 'epoch': 3} {'type': 'loss', 'content': 7.02980105415918e-05, 'timestamp': '2025-09-10 02:26:16.324261', 'step': 4962, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 7119318626112}, 'timestamp': '2025-09-10 02:26:16.358251', 'step': 4962, 'epoch': 3} {'type': 'loss', 'content': 0.003547137137502432, 'timestamp': '2025-09-10 02:26:16.365900', 'step': 4963, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 336], 'flops': 9966940982208}, 'timestamp': '2025-09-10 02:26:16.399984', 'step': 4963, 'epoch': 3} {'type': 'loss', 'content': 8.306949894176796e-05, 'timestamp': '2025-09-10 02:26:16.434207', 'step': 4964, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 7119318626112}, 'timestamp': '2025-09-10 02:26:16.467886', 'step': 4964, 'epoch': 3} {'type': 'loss', 'content': 0.00013334887626115233, 'timestamp': '2025-09-10 02:26:16.473130', 'step': 4965, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 320], 'flops': 9492337256192}, 'timestamp': '2025-09-10 02:26:16.504496', 'step': 4965, 'epoch': 3} {'type': 'loss', 'content': 5.8079971495317295e-05, 'timestamp': '2025-09-10 02:26:16.516848', 'step': 4966, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 5695507448064}, 'timestamp': '2025-09-10 02:26:16.547233', 'step': 4966, 'epoch': 3} {'type': 'loss', 'content': 3.8230766222113743e-05, 'timestamp': '2025-09-10 02:26:16.551728', 'step': 4967, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 7593922352128}, 'timestamp': '2025-09-10 02:26:16.582260', 'step': 4967, 'epoch': 3} {'type': 'loss', 'content': 0.0009217527112923563, 'timestamp': '2025-09-10 02:26:16.610964', 'step': 4968, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 6644714900096}, 'timestamp': '2025-09-10 02:26:16.641445', 'step': 4968, 'epoch': 3} {'type': 'loss', 'content': 9.093651169678196e-05, 'timestamp': '2025-09-10 02:26:16.646147', 'step': 4969, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 7593922352128}, 'timestamp': '2025-09-10 02:26:16.677638', 'step': 4969, 'epoch': 3} {'type': 'loss', 'content': 0.0006383144063875079, 'timestamp': '2025-09-10 02:26:16.685338', 'step': 4970, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 7593922352128}, 'timestamp': '2025-09-10 02:26:16.716465', 'step': 4970, 'epoch': 3} {'type': 'loss', 'content': 0.00012200616765767336, 'timestamp': '2025-09-10 02:26:16.724215', 'step': 4971, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 7119318626112}, 'timestamp': '2025-09-10 02:26:16.756583', 'step': 4971, 'epoch': 3} {'type': 'loss', 'content': 0.00018401713168714195, 'timestamp': '2025-09-10 02:26:16.784994', 'step': 4972, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 5695507448064}, 'timestamp': '2025-09-10 02:26:16.815604', 'step': 4972, 'epoch': 3} {'type': 'loss', 'content': 3.1525421945843846e-05, 'timestamp': '2025-09-10 02:26:16.817894', 'step': 4973, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 7593922352128}, 'timestamp': '2025-09-10 02:26:16.849803', 'step': 4973, 'epoch': 3} {'type': 'loss', 'content': 0.00023603920999448746, 'timestamp': '2025-09-10 02:26:16.857514', 'step': 4974, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 336], 'flops': 9966940982208}, 'timestamp': '2025-09-10 02:26:16.891206', 'step': 4974, 'epoch': 3} {'type': 'loss', 'content': 0.0003533114795573056, 'timestamp': '2025-09-10 02:26:16.904618', 'step': 4975, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 6644714900096}, 'timestamp': '2025-09-10 02:26:16.936270', 'step': 4975, 'epoch': 3} {'type': 'loss', 'content': 0.07041340321302414, 'timestamp': '2025-09-10 02:26:16.964307', 'step': 4976, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 4746299996032}, 'timestamp': '2025-09-10 02:26:16.995447', 'step': 4976, 'epoch': 3} {'type': 'loss', 'content': 0.0019405941711738706, 'timestamp': '2025-09-10 02:26:17.000357', 'step': 4977, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 8543129804160}, 'timestamp': '2025-09-10 02:26:17.034936', 'step': 4977, 'epoch': 3} {'type': 'loss', 'content': 0.0004297175328247249, 'timestamp': '2025-09-10 02:26:17.045363', 'step': 4978, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 7593922352128}, 'timestamp': '2025-09-10 02:26:17.077867', 'step': 4978, 'epoch': 3} {'type': 'loss', 'content': 0.00030306234839372337, 'timestamp': '2025-09-10 02:26:17.085791', 'step': 4979, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 336], 'flops': 9966940982208}, 'timestamp': '2025-09-10 02:26:17.119765', 'step': 4979, 'epoch': 3} {'type': 'loss', 'content': 0.000214372223126702, 'timestamp': '2025-09-10 02:26:17.154008', 'step': 4980, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 7593922352128}, 'timestamp': '2025-09-10 02:26:17.185421', 'step': 4980, 'epoch': 3} {'type': 'loss', 'content': 0.0060597313567996025, 'timestamp': '2025-09-10 02:26:17.190972', 'step': 4981, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-10 02:26:17.224555', 'step': 4981, 'epoch': 3} {'type': 'loss', 'content': 0.0004712548106908798, 'timestamp': '2025-09-10 02:26:17.226680', 'step': 4982, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 6170111174080}, 'timestamp': '2025-09-10 02:26:17.259673', 'step': 4982, 'epoch': 3} {'type': 'loss', 'content': 0.018239330500364304, 'timestamp': '2025-09-10 02:26:17.266706', 'step': 4983, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 320], 'flops': 9492337256192}, 'timestamp': '2025-09-10 02:26:17.298378', 'step': 4983, 'epoch': 3} {'type': 'loss', 'content': 5.246271757641807e-05, 'timestamp': '2025-09-10 02:26:17.331775', 'step': 4984, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 6644714900096}, 'timestamp': '2025-09-10 02:26:17.362536', 'step': 4984, 'epoch': 3} {'type': 'loss', 'content': 0.000588534923736006, 'timestamp': '2025-09-10 02:26:17.367121', 'step': 4985, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 7593922352128}, 'timestamp': '2025-09-10 02:26:17.397655', 'step': 4985, 'epoch': 3} {'type': 'loss', 'content': 0.04317157343029976, 'timestamp': '2025-09-10 02:26:17.405334', 'step': 4986, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 6170111174080}, 'timestamp': '2025-09-10 02:26:17.436371', 'step': 4986, 'epoch': 3} {'type': 'loss', 'content': 0.0006422134465537965, 'timestamp': '2025-09-10 02:26:17.443245', 'step': 4987, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 7119318626112}, 'timestamp': '2025-09-10 02:26:17.474316', 'step': 4987, 'epoch': 3} {'type': 'loss', 'content': 0.00034632027382031083, 'timestamp': '2025-09-10 02:26:17.502573', 'step': 4988, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 6644714900096}, 'timestamp': '2025-09-10 02:26:17.535084', 'step': 4988, 'epoch': 3} {'type': 'loss', 'content': 8.68417409947142e-05, 'timestamp': '2025-09-10 02:26:17.539828', 'step': 4989, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 176], 'flops': 5220903722048}, 'timestamp': '2025-09-10 02:26:17.572346', 'step': 4989, 'epoch': 3} {'type': 'loss', 'content': 0.00013398627925198525, 'timestamp': '2025-09-10 02:26:17.576500', 'step': 4990, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 352], 'flops': 10441544708224}, 'timestamp': '2025-09-10 02:26:17.612524', 'step': 4990, 'epoch': 3} {'type': 'loss', 'content': 0.0029133365023881197, 'timestamp': '2025-09-10 02:26:17.626175', 'step': 4991, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 6170111174080}, 'timestamp': '2025-09-10 02:26:17.658240', 'step': 4991, 'epoch': 3} {'type': 'loss', 'content': 8.629496005596593e-05, 'timestamp': '2025-09-10 02:26:17.686141', 'step': 4992, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 7119318626112}, 'timestamp': '2025-09-10 02:26:17.718276', 'step': 4992, 'epoch': 3} {'type': 'loss', 'content': 0.0002693708229344338, 'timestamp': '2025-09-10 02:26:17.723486', 'step': 4993, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 7119318626112}, 'timestamp': '2025-09-10 02:26:17.755369', 'step': 4993, 'epoch': 3} {'type': 'loss', 'content': 0.00012545159552246332, 'timestamp': '2025-09-10 02:26:17.763054', 'step': 4994, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 304], 'flops': 9017733530176}, 'timestamp': '2025-09-10 02:26:17.795274', 'step': 4994, 'epoch': 3} {'type': 'loss', 'content': 0.0005542716244235635, 'timestamp': '2025-09-10 02:26:17.807429', 'step': 4995, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 4746299996032}, 'timestamp': '2025-09-10 02:26:17.838080', 'step': 4995, 'epoch': 3} {'type': 'loss', 'content': 0.0003210590220987797, 'timestamp': '2025-09-10 02:26:17.861907', 'step': 4996, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 6644714900096}, 'timestamp': '2025-09-10 02:26:17.893230', 'step': 4996, 'epoch': 3} {'type': 'loss', 'content': 7.695386011619121e-05, 'timestamp': '2025-09-10 02:26:17.898024', 'step': 4997, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 7119318626112}, 'timestamp': '2025-09-10 02:26:17.928678', 'step': 4997, 'epoch': 3} {'type': 'loss', 'content': 0.0001586790895089507, 'timestamp': '2025-09-10 02:26:17.936042', 'step': 4998, 'epoch': 3} {'type': 'flops', 'content': [{'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5695213340928}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3796808916480}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4429610391296}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4746011128704}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4746011128704}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6011614078336}, {'type': 'perplexity', 'in_batch_dim': [8, 736], 'batch_size': 8, 'flops': 14554433988352}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3796808916480}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4113209653888}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3796808916480}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4746011128704}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2847606704256}, {'type': 'perplexity', 'in_batch_dim': [8, 320], 'batch_size': 8, 'flops': 6328014815744}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5378812603520}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3796808916480}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4746011128704}, {'type': 'perplexity', 'in_batch_dim': [8, 352], 'batch_size': 8, 'flops': 6960816290560}, {'type': 'perplexity', 'in_batch_dim': [8, 384], 'batch_size': 8, 'flops': 7593617765376}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5378812603520}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4429610391296}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4113209653888}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5062411866112}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5062411866112}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4429610391296}, {'type': 'perplexity', 'in_batch_dim': [8, 480], 'batch_size': 8, 'flops': 9492022189824}, {'type': 'perplexity', 'in_batch_dim': [8, 448], 'batch_size': 8, 'flops': 8859220715008}, {'type': 'perplexity', 'in_batch_dim': [8, 320], 'batch_size': 8, 'flops': 6328014815744}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3796808916480}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6011614078336}, {'type': 'perplexity', 'in_batch_dim': [8, 320], 'batch_size': 8, 'flops': 6328014815744}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5695213340928}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4746011128704}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4429610391296}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5378812603520}, {'type': 'perplexity', 'in_batch_dim': [8, 544], 'batch_size': 8, 'flops': 10757625139456}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4429610391296}, {'type': 'perplexity', 'in_batch_dim': [8, 320], 'batch_size': 8, 'flops': 6328014815744}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4113209653888}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4113209653888}, {'type': 'perplexity', 'in_batch_dim': [8, 320], 'batch_size': 8, 'flops': 6328014815744}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3480408179072}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5062411866112}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6011614078336}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4429610391296}, {'type': 'perplexity', 'in_batch_dim': [8, 384], 'batch_size': 8, 'flops': 7593617765376}, {'type': 'perplexity', 'in_batch_dim': [8, 416], 'batch_size': 8, 'flops': 8226419240192}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4746011128704}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3480408179072}, {'type': 'perplexity', 'in_batch_dim': [8, 384], 'batch_size': 8, 'flops': 7593617765376}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3480408179072}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3480408179072}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5378812603520}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3796808916480}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4113209653888}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5062411866112}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5378812603520}, {'type': 'perplexity', 'in_batch_dim': [8, 320], 'batch_size': 8, 'flops': 6328014815744}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4429610391296}, {'type': 'perplexity', 'in_batch_dim': [8, 160], 'batch_size': 8, 'flops': 3164007441664}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4113209653888}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5695213340928}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3796808916480}, {'type': 'perplexity', 'in_batch_dim': [8, 160], 'batch_size': 8, 'flops': 3164007441664}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6011614078336}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5062411866112}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4429610391296}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3480408179072}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4746011128704}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3480408179072}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4113209653888}, {'type': 'perplexity', 'in_batch_dim': [8, 624], 'batch_size': 8, 'flops': 12339628826496}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4113209653888}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5695213340928}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5062411866112}, {'type': 'perplexity', 'in_batch_dim': [8, 384], 'batch_size': 8, 'flops': 7593617765376}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4429610391296}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4113209653888}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5062411866112}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4429610391296}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5695213340928}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3796808916480}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5378812603520}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4429610391296}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5378812603520}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3796808916480}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5695213340928}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4429610391296}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5062411866112}, {'type': 'perplexity', 'in_batch_dim': [8, 160], 'batch_size': 8, 'flops': 3164007441664}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4746011128704}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5695213340928}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3796808916480}, {'type': 'perplexity', 'in_batch_dim': [8, 416], 'batch_size': 8, 'flops': 8226419240192}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3796808916480}, {'type': 'perplexity', 'in_batch_dim': [8, 160], 'batch_size': 8, 'flops': 3164007441664}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4746011128704}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3796808916480}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2847606704256}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5378812603520}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4429610391296}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4113209653888}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4429610391296}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6011614078336}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4113209653888}, {'type': 'perplexity', 'in_batch_dim': [8, 320], 'batch_size': 8, 'flops': 6328014815744}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4746011128704}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4746011128704}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3796808916480}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5062411866112}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3480408179072}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3796808916480}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3480408179072}, {'type': 'perplexity', 'in_batch_dim': [8, 352], 'batch_size': 8, 'flops': 6960816290560}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5062411866112}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4429610391296}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6011614078336}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5062411866112}, {'type': 'perplexity', 'in_batch_dim': [8, 576], 'batch_size': 8, 'flops': 11390426614272}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4429610391296}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3796808916480}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6011614078336}, {'type': 'perplexity', 'in_batch_dim': [8, 432], 'batch_size': 8, 'flops': 8542819977600}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5378812603520}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5378812603520}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5062411866112}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4113209653888}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4113209653888}, {'type': 'perplexity', 'in_batch_dim': [8, 368], 'batch_size': 8, 'flops': 7277217027968}, {'type': 'perplexity', 'in_batch_dim': [8, 160], 'batch_size': 8, 'flops': 3164007441664}, {'type': 'perplexity', 'in_batch_dim': [8, 368], 'batch_size': 8, 'flops': 7277217027968}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3796808916480}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4746011128704}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4113209653888}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4429610391296}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5695213340928}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5695213340928}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5062411866112}, {'type': 'perplexity', 'in_batch_dim': [8, 432], 'batch_size': 8, 'flops': 8542819977600}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5062411866112}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4113209653888}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3480408179072}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3480408179072}, {'type': 'perplexity', 'in_batch_dim': [8, 160], 'batch_size': 8, 'flops': 3164007441664}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4113209653888}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3796808916480}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5062411866112}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 368], 'batch_size': 8, 'flops': 7277217027968}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5378812603520}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5378812603520}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5062411866112}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2847606704256}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4429610391296}, {'type': 'perplexity', 'in_batch_dim': [8, 320], 'batch_size': 8, 'flops': 6328014815744}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3796808916480}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4429610391296}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4113209653888}, {'type': 'perplexity', 'in_batch_dim': [8, 384], 'batch_size': 8, 'flops': 7593617765376}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5378812603520}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4746011128704}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3480408179072}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5062411866112}, {'type': 'perplexity', 'in_batch_dim': [8, 320], 'batch_size': 8, 'flops': 6328014815744}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5378812603520}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4113209653888}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4113209653888}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5695213340928}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3480408179072}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6011614078336}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4746011128704}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5378812603520}, {'type': 'perplexity', 'in_batch_dim': [8, 320], 'batch_size': 8, 'flops': 6328014815744}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4429610391296}, {'type': 'perplexity', 'in_batch_dim': [8, 320], 'batch_size': 8, 'flops': 6328014815744}, {'type': 'perplexity', 'in_batch_dim': [8, 416], 'batch_size': 8, 'flops': 8226419240192}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5062411866112}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5062411866112}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2847606704256}, {'type': 'perplexity', 'in_batch_dim': [8, 336], 'batch_size': 8, 'flops': 6644415553152}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6011614078336}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5062411866112}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4113209653888}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5695213340928}, {'type': 'perplexity', 'in_batch_dim': [8, 416], 'batch_size': 8, 'flops': 8226419240192}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3796808916480}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5695213340928}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4746011128704}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4429610391296}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4113209653888}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3480408179072}, {'type': 'perplexity', 'in_batch_dim': [8, 320], 'batch_size': 8, 'flops': 6328014815744}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3480408179072}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5378812603520}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6011614078336}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4429610391296}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5695213340928}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5695213340928}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3480408179072}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4429610391296}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5062411866112}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3480408179072}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4113209653888}, {'type': 'perplexity', 'in_batch_dim': [8, 352], 'batch_size': 8, 'flops': 6960816290560}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4113209653888}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2847606704256}, {'type': 'perplexity', 'in_batch_dim': [8, 160], 'batch_size': 8, 'flops': 3164007441664}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3796808916480}, {'type': 'perplexity', 'in_batch_dim': [8, 496], 'batch_size': 8, 'flops': 9808422927232}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3480408179072}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4113209653888}, {'type': 'perplexity', 'in_batch_dim': [8, 544], 'batch_size': 8, 'flops': 10757625139456}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6011614078336}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5378812603520}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5695213340928}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4746011128704}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5695213340928}, {'type': 'perplexity', 'in_batch_dim': [8, 384], 'batch_size': 8, 'flops': 7593617765376}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3480408179072}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5378812603520}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3480408179072}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5378812603520}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5062411866112}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3796808916480}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5062411866112}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5695213340928}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5378812603520}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4429610391296}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5062411866112}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3480408179072}, {'type': 'perplexity', 'in_batch_dim': [8, 656], 'batch_size': 8, 'flops': 12972430301312}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5062411866112}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3796808916480}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3480408179072}, {'type': 'perplexity', 'in_batch_dim': [8, 160], 'batch_size': 8, 'flops': 3164007441664}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5695213340928}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6011614078336}, {'type': 'perplexity', 'in_batch_dim': [8, 320], 'batch_size': 8, 'flops': 6328014815744}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5062411866112}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5062411866112}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5695213340928}, {'type': 'perplexity', 'in_batch_dim': [8, 320], 'batch_size': 8, 'flops': 6328014815744}, {'type': 'perplexity', 'in_batch_dim': [8, 384], 'batch_size': 8, 'flops': 7593617765376}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5062411866112}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4746011128704}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4113209653888}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4746011128704}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4746011128704}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3480408179072}, {'type': 'perplexity', 'in_batch_dim': [8, 320], 'batch_size': 8, 'flops': 6328014815744}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5695213340928}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5062411866112}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4113209653888}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4746011128704}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2847606704256}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3796808916480}, {'type': 'perplexity', 'in_batch_dim': [8, 336], 'batch_size': 8, 'flops': 6644415553152}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4113209653888}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4746011128704}, {'type': 'perplexity', 'in_batch_dim': [8, 496], 'batch_size': 8, 'flops': 9808422927232}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5378812603520}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5378812603520}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4429610391296}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5062411866112}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6011614078336}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4746011128704}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3796808916480}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4113209653888}, {'type': 'perplexity', 'in_batch_dim': [8, 496], 'batch_size': 8, 'flops': 9808422927232}, {'type': 'perplexity', 'in_batch_dim': [8, 448], 'batch_size': 8, 'flops': 8859220715008}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6011614078336}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3796808916480}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3480408179072}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5062411866112}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4429610391296}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4113209653888}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4746011128704}, {'type': 'perplexity', 'in_batch_dim': [8, 320], 'batch_size': 8, 'flops': 6328014815744}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6011614078336}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3480408179072}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3796808916480}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3796808916480}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4746011128704}, {'type': 'perplexity', 'in_batch_dim': [8, 416], 'batch_size': 8, 'flops': 8226419240192}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5695213340928}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6011614078336}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6011614078336}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4746011128704}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3480408179072}, {'type': 'perplexity', 'in_batch_dim': [8, 160], 'batch_size': 8, 'flops': 3164007441664}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5695213340928}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3480408179072}, {'type': 'perplexity', 'in_batch_dim': [8, 400], 'batch_size': 8, 'flops': 7910018502784}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4429610391296}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5378812603520}, {'type': 'perplexity', 'in_batch_dim': [8, 400], 'batch_size': 8, 'flops': 7910018502784}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5378812603520}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3480408179072}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6011614078336}, {'type': 'perplexity', 'in_batch_dim': [8, 160], 'batch_size': 8, 'flops': 3164007441664}, {'type': 'perplexity', 'in_batch_dim': [8, 352], 'batch_size': 8, 'flops': 6960816290560}, {'type': 'perplexity', 'in_batch_dim': [8, 320], 'batch_size': 8, 'flops': 6328014815744}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5062411866112}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5062411866112}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4113209653888}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4429610391296}, {'type': 'perplexity', 'in_batch_dim': [8, 352], 'batch_size': 8, 'flops': 6960816290560}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4113209653888}, {'type': 'perplexity', 'in_batch_dim': [8, 160], 'batch_size': 8, 'flops': 3164007441664}, {'type': 'perplexity', 'in_batch_dim': [8, 336], 'batch_size': 8, 'flops': 6644415553152}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4429610391296}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3796808916480}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5378812603520}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5378812603520}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4113209653888}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5378812603520}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3796808916480}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5378812603520}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5378812603520}, {'type': 'perplexity', 'in_batch_dim': [8, 160], 'batch_size': 8, 'flops': 3164007441664}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3480408179072}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6011614078336}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5695213340928}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5378812603520}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5378812603520}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4429610391296}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4746011128704}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6011614078336}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5062411866112}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6011614078336}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5378812603520}, {'type': 'perplexity', 'in_batch_dim': [8, 160], 'batch_size': 8, 'flops': 3164007441664}, {'type': 'perplexity', 'in_batch_dim': [8, 160], 'batch_size': 8, 'flops': 3164007441664}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5062411866112}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5062411866112}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3796808916480}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6011614078336}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4113209653888}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4746011128704}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5378812603520}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3480408179072}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3796808916480}, {'type': 'perplexity', 'in_batch_dim': [8, 320], 'batch_size': 8, 'flops': 6328014815744}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5062411866112}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4746011128704}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5062411866112}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4746011128704}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6011614078336}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3796808916480}, {'type': 'perplexity', 'in_batch_dim': [8, 160], 'batch_size': 8, 'flops': 3164007441664}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2847606704256}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4429610391296}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4746011128704}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5062411866112}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5378812603520}, {'type': 'perplexity', 'in_batch_dim': [8, 464], 'batch_size': 8, 'flops': 9175621452416}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4113209653888}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3480408179072}, {'type': 'perplexity', 'in_batch_dim': [8, 384], 'batch_size': 8, 'flops': 7593617765376}, {'type': 'perplexity', 'in_batch_dim': [8, 448], 'batch_size': 8, 'flops': 8859220715008}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5378812603520}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4113209653888}, {'type': 'perplexity', 'in_batch_dim': [8, 320], 'batch_size': 8, 'flops': 6328014815744}, {'type': 'perplexity', 'in_batch_dim': [8, 560], 'batch_size': 8, 'flops': 11074025876864}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5378812603520}, {'type': 'perplexity', 'in_batch_dim': [8, 160], 'batch_size': 8, 'flops': 3164007441664}, {'type': 'perplexity', 'in_batch_dim': [8, 320], 'batch_size': 8, 'flops': 6328014815744}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5062411866112}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6011614078336}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4746011128704}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6011614078336}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3796808916480}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4429610391296}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4113209653888}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4746011128704}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3480408179072}, {'type': 'perplexity', 'in_batch_dim': [8, 384], 'batch_size': 8, 'flops': 7593617765376}, {'type': 'perplexity', 'in_batch_dim': [8, 576], 'batch_size': 8, 'flops': 11390426614272}, {'type': 'perplexity', 'in_batch_dim': [8, 160], 'batch_size': 8, 'flops': 3164007441664}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4746011128704}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4746011128704}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4429610391296}, {'type': 'perplexity', 'in_batch_dim': [8, 336], 'batch_size': 8, 'flops': 6644415553152}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4746011128704}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3796808916480}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3796808916480}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5062411866112}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5378812603520}, {'type': 'perplexity', 'in_batch_dim': [8, 368], 'batch_size': 8, 'flops': 7277217027968}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3480408179072}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4113209653888}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4746011128704}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3480408179072}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4746011128704}, {'type': 'perplexity', 'in_batch_dim': [8, 1168], 'batch_size': 8, 'flops': 23097253898368}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5062411866112}, {'type': 'perplexity', 'in_batch_dim': [8, 160], 'batch_size': 8, 'flops': 3164007441664}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4746011128704}, {'type': 'perplexity', 'in_batch_dim': [8, 640], 'batch_size': 8, 'flops': 12656029563904}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5378812603520}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4429610391296}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4429610391296}, {'type': 'perplexity', 'in_batch_dim': [8, 384], 'batch_size': 8, 'flops': 7593617765376}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3480408179072}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4113209653888}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3796808916480}, {'type': 'perplexity', 'in_batch_dim': [8, 336], 'batch_size': 8, 'flops': 6644415553152}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3480408179072}, {'type': 'perplexity', 'in_batch_dim': [6, 208], 'batch_size': 8, 'flops': 4113209653888}], 'timestamp': '2025-09-10 02:26:28.250382', 'step': 4998, 'epoch': 3} {'type': 'pplx', 'content': 20015215.356057025, 'timestamp': '2025-09-10 02:26:28.261232', 'step': 4998, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 4746299996032}, 'timestamp': '2025-09-10 02:26:28.310477', 'step': 4998, 'epoch': 3} {'type': 'loss', 'content': 0.0005191663512960076, 'timestamp': '2025-09-10 02:26:28.323762', 'step': 4999, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 6170111174080}, 'timestamp': '2025-09-10 02:26:28.385226', 'step': 4999, 'epoch': 3} {'type': 'loss', 'content': 0.0004746699705719948, 'timestamp': '2025-09-10 02:26:28.412768', 'step': 5000, 'epoch': 3} {'type': 'info', 'content': 'Checkpoint saved at step 5000', 'timestamp': '2025-09-10 02:26:33.179348', 'step': 5000, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 6644714900096}, 'timestamp': '2025-09-10 02:26:33.211464', 'step': 5000, 'epoch': 3} {'type': 'loss', 'content': 0.0004966092528775334, 'timestamp': '2025-09-10 02:26:33.215172', 'step': 5001, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 6170111174080}, 'timestamp': '2025-09-10 02:26:33.247140', 'step': 5001, 'epoch': 3} {'type': 'loss', 'content': 0.0005471754702739418, 'timestamp': '2025-09-10 02:26:33.253624', 'step': 5002, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 8068526078144}, 'timestamp': '2025-09-10 02:26:33.286134', 'step': 5002, 'epoch': 3} {'type': 'loss', 'content': 0.00014292819832917303, 'timestamp': '2025-09-10 02:26:33.295863', 'step': 5003, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 6644714900096}, 'timestamp': '2025-09-10 02:26:33.327101', 'step': 5003, 'epoch': 3} {'type': 'loss', 'content': 0.0004297696577850729, 'timestamp': '2025-09-10 02:26:33.354748', 'step': 5004, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 304], 'flops': 9017733530176}, 'timestamp': '2025-09-10 02:26:33.386788', 'step': 5004, 'epoch': 3} {'type': 'loss', 'content': 8.166915358742699e-05, 'timestamp': '2025-09-10 02:26:33.396413', 'step': 5005, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 8543129804160}, 'timestamp': '2025-09-10 02:26:33.427713', 'step': 5005, 'epoch': 3} {'type': 'loss', 'content': 0.00015276219346560538, 'timestamp': '2025-09-10 02:26:33.438578', 'step': 5006, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 8068526078144}, 'timestamp': '2025-09-10 02:26:33.472065', 'step': 5006, 'epoch': 3} {'type': 'loss', 'content': 0.0017286234069615602, 'timestamp': '2025-09-10 02:26:33.482351', 'step': 5007, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 6644714900096}, 'timestamp': '2025-09-10 02:26:33.512627', 'step': 5007, 'epoch': 3} {'type': 'loss', 'content': 0.00016119235078804195, 'timestamp': '2025-09-10 02:26:33.540535', 'step': 5008, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 352], 'flops': 10441544708224}, 'timestamp': '2025-09-10 02:26:33.573284', 'step': 5008, 'epoch': 3} {'type': 'loss', 'content': 0.0011435570195317268, 'timestamp': '2025-09-10 02:26:33.586284', 'step': 5009, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 384], 'flops': 11390752160256}, 'timestamp': '2025-09-10 02:26:33.621203', 'step': 5009, 'epoch': 3} {'type': 'loss', 'content': 0.0010838081361725926, 'timestamp': '2025-09-10 02:26:33.635195', 'step': 5010, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 6644714900096}, 'timestamp': '2025-09-10 02:26:33.665549', 'step': 5010, 'epoch': 3} {'type': 'loss', 'content': 0.0003136697050649673, 'timestamp': '2025-09-10 02:26:33.672471', 'step': 5011, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 176], 'flops': 5220903722048}, 'timestamp': '2025-09-10 02:26:33.703720', 'step': 5011, 'epoch': 3} {'type': 'loss', 'content': 5.7531153288437054e-05, 'timestamp': '2025-09-10 02:26:33.728906', 'step': 5012, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 400], 'flops': 11865355886272}, 'timestamp': '2025-09-10 02:26:33.765485', 'step': 5012, 'epoch': 3} {'type': 'loss', 'content': 0.001678946428000927, 'timestamp': '2025-09-10 02:26:33.780640', 'step': 5013, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 4746299996032}, 'timestamp': '2025-09-10 02:26:33.812596', 'step': 5013, 'epoch': 3} {'type': 'loss', 'content': 0.0004162538971286267, 'timestamp': '2025-09-10 02:26:33.815078', 'step': 5014, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 5695507448064}, 'timestamp': '2025-09-10 02:26:33.847600', 'step': 5014, 'epoch': 3} {'type': 'loss', 'content': 0.0012148728128522635, 'timestamp': '2025-09-10 02:26:33.852067', 'step': 5015, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 8068526078144}, 'timestamp': '2025-09-10 02:26:33.883868', 'step': 5015, 'epoch': 3} {'type': 'loss', 'content': 0.019105346873402596, 'timestamp': '2025-09-10 02:26:33.915046', 'step': 5016, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 8068526078144}, 'timestamp': '2025-09-10 02:26:33.946080', 'step': 5016, 'epoch': 3} {'type': 'loss', 'content': 0.001212744740769267, 'timestamp': '2025-09-10 02:26:33.953972', 'step': 5017, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 7119318626112}, 'timestamp': '2025-09-10 02:26:33.986413', 'step': 5017, 'epoch': 3} {'type': 'loss', 'content': 0.0005570516805164516, 'timestamp': '2025-09-10 02:26:33.993998', 'step': 5018, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 8068526078144}, 'timestamp': '2025-09-10 02:26:34.024829', 'step': 5018, 'epoch': 3} {'type': 'loss', 'content': 0.004198791459202766, 'timestamp': '2025-09-10 02:26:34.035098', 'step': 5019, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 8543129804160}, 'timestamp': '2025-09-10 02:26:34.066423', 'step': 5019, 'epoch': 3} {'type': 'loss', 'content': 0.0003597374598030001, 'timestamp': '2025-09-10 02:26:34.098448', 'step': 5020, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 320], 'flops': 9492337256192}, 'timestamp': '2025-09-10 02:26:34.131175', 'step': 5020, 'epoch': 3} {'type': 'loss', 'content': 0.01109243929386139, 'timestamp': '2025-09-10 02:26:34.141461', 'step': 5021, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 7593922352128}, 'timestamp': '2025-09-10 02:26:34.172970', 'step': 5021, 'epoch': 3} {'type': 'loss', 'content': 0.0014968998730182648, 'timestamp': '2025-09-10 02:26:34.180732', 'step': 5022, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 6644714900096}, 'timestamp': '2025-09-10 02:26:34.212729', 'step': 5022, 'epoch': 3} {'type': 'loss', 'content': 0.00012139989848947152, 'timestamp': '2025-09-10 02:26:34.219631', 'step': 5023, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 176], 'flops': 5220903722048}, 'timestamp': '2025-09-10 02:26:34.251172', 'step': 5023, 'epoch': 3} {'type': 'loss', 'content': 0.0002333705051569268, 'timestamp': '2025-09-10 02:26:34.277453', 'step': 5024, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 6170111174080}, 'timestamp': '2025-09-10 02:26:34.308015', 'step': 5024, 'epoch': 3} {'type': 'loss', 'content': 0.0001543233374832198, 'timestamp': '2025-09-10 02:26:34.312696', 'step': 5025, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 384], 'flops': 11390752160256}, 'timestamp': '2025-09-10 02:26:34.347950', 'step': 5025, 'epoch': 3} {'type': 'loss', 'content': 0.0005688256933353841, 'timestamp': '2025-09-10 02:26:34.361964', 'step': 5026, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 352], 'flops': 10441544708224}, 'timestamp': '2025-09-10 02:26:34.397329', 'step': 5026, 'epoch': 3} {'type': 'loss', 'content': 0.00042403684346936643, 'timestamp': '2025-09-10 02:26:34.410955', 'step': 5027, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 304], 'flops': 9017733530176}, 'timestamp': '2025-09-10 02:26:34.444474', 'step': 5027, 'epoch': 3} {'type': 'loss', 'content': 0.0002217363507952541, 'timestamp': '2025-09-10 02:26:34.477391', 'step': 5028, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 176], 'flops': 5220903722048}, 'timestamp': '2025-09-10 02:26:34.508716', 'step': 5028, 'epoch': 3} {'type': 'loss', 'content': 0.0006893317913636565, 'timestamp': '2025-09-10 02:26:34.511043', 'step': 5029, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 6170111174080}, 'timestamp': '2025-09-10 02:26:34.541016', 'step': 5029, 'epoch': 3} {'type': 'loss', 'content': 0.008850133046507835, 'timestamp': '2025-09-10 02:26:34.548002', 'step': 5030, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 7119318626112}, 'timestamp': '2025-09-10 02:26:34.582927', 'step': 5030, 'epoch': 3} {'type': 'loss', 'content': 0.00032538484083488584, 'timestamp': '2025-09-10 02:26:34.590672', 'step': 5031, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 8543129804160}, 'timestamp': '2025-09-10 02:26:34.626976', 'step': 5031, 'epoch': 3} {'type': 'loss', 'content': 0.00039912323700264096, 'timestamp': '2025-09-10 02:26:34.658831', 'step': 5032, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 6170111174080}, 'timestamp': '2025-09-10 02:26:34.695229', 'step': 5032, 'epoch': 3} {'type': 'loss', 'content': 0.007538055535405874, 'timestamp': '2025-09-10 02:26:34.699593', 'step': 5033, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 8068526078144}, 'timestamp': '2025-09-10 02:26:34.730572', 'step': 5033, 'epoch': 3} {'type': 'loss', 'content': 6.657992344116792e-05, 'timestamp': '2025-09-10 02:26:34.740878', 'step': 5034, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 400], 'flops': 11865355886272}, 'timestamp': '2025-09-10 02:26:34.779662', 'step': 5034, 'epoch': 3} {'type': 'loss', 'content': 0.0010134560288861394, 'timestamp': '2025-09-10 02:26:34.795315', 'step': 5035, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 7593922352128}, 'timestamp': '2025-09-10 02:26:34.827775', 'step': 5035, 'epoch': 3} {'type': 'loss', 'content': 0.0024202538188546896, 'timestamp': '2025-09-10 02:26:34.856235', 'step': 5036, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 320], 'flops': 9492337256192}, 'timestamp': '2025-09-10 02:26:34.886920', 'step': 5036, 'epoch': 3} {'type': 'loss', 'content': 0.0004926332621835172, 'timestamp': '2025-09-10 02:26:34.897060', 'step': 5037, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 7119318626112}, 'timestamp': '2025-09-10 02:26:34.928166', 'step': 5037, 'epoch': 3} {'type': 'loss', 'content': 0.0006999452598392963, 'timestamp': '2025-09-10 02:26:34.935741', 'step': 5038, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 7593922352128}, 'timestamp': '2025-09-10 02:26:34.966631', 'step': 5038, 'epoch': 3} {'type': 'loss', 'content': 0.005028885323554277, 'timestamp': '2025-09-10 02:26:34.974266', 'step': 5039, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 5695507448064}, 'timestamp': '2025-09-10 02:26:35.008987', 'step': 5039, 'epoch': 3} {'type': 'loss', 'content': 0.00027442388818599284, 'timestamp': '2025-09-10 02:26:35.034466', 'step': 5040, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 5695507448064}, 'timestamp': '2025-09-10 02:26:35.066296', 'step': 5040, 'epoch': 3} {'type': 'loss', 'content': 0.005980245769023895, 'timestamp': '2025-09-10 02:26:35.068853', 'step': 5041, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 8068526078144}, 'timestamp': '2025-09-10 02:26:35.099419', 'step': 5041, 'epoch': 3} {'type': 'loss', 'content': 0.00015993161650840193, 'timestamp': '2025-09-10 02:26:35.109597', 'step': 5042, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 5695507448064}, 'timestamp': '2025-09-10 02:26:35.142793', 'step': 5042, 'epoch': 3} {'type': 'loss', 'content': 0.0005007135332562029, 'timestamp': '2025-09-10 02:26:35.147451', 'step': 5043, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 6170111174080}, 'timestamp': '2025-09-10 02:26:35.180162', 'step': 5043, 'epoch': 3} {'type': 'loss', 'content': 0.0006699333898723125, 'timestamp': '2025-09-10 02:26:35.207921', 'step': 5044, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 6644714900096}, 'timestamp': '2025-09-10 02:26:35.240430', 'step': 5044, 'epoch': 3} {'type': 'loss', 'content': 0.00028629746520891786, 'timestamp': '2025-09-10 02:26:35.245081', 'step': 5045, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 8068526078144}, 'timestamp': '2025-09-10 02:26:35.277554', 'step': 5045, 'epoch': 3} {'type': 'loss', 'content': 0.0006818973342888057, 'timestamp': '2025-09-10 02:26:35.287372', 'step': 5046, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 7593922352128}, 'timestamp': '2025-09-10 02:26:35.318835', 'step': 5046, 'epoch': 3} {'type': 'loss', 'content': 0.002404263708740473, 'timestamp': '2025-09-10 02:26:35.326633', 'step': 5047, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 6170111174080}, 'timestamp': '2025-09-10 02:26:35.359340', 'step': 5047, 'epoch': 3} {'type': 'loss', 'content': 0.02151825651526451, 'timestamp': '2025-09-10 02:26:35.387052', 'step': 5048, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 6644714900096}, 'timestamp': '2025-09-10 02:26:35.417860', 'step': 5048, 'epoch': 3} {'type': 'loss', 'content': 0.0003552958951331675, 'timestamp': '2025-09-10 02:26:35.422447', 'step': 5049, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 6170111174080}, 'timestamp': '2025-09-10 02:26:35.452240', 'step': 5049, 'epoch': 3} {'type': 'loss', 'content': 0.00025253373314626515, 'timestamp': '2025-09-10 02:26:35.459190', 'step': 5050, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 5695507448064}, 'timestamp': '2025-09-10 02:26:35.489346', 'step': 5050, 'epoch': 3} {'type': 'loss', 'content': 0.00040127182728610933, 'timestamp': '2025-09-10 02:26:35.493878', 'step': 5051, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 176], 'flops': 5220903722048}, 'timestamp': '2025-09-10 02:26:35.524519', 'step': 5051, 'epoch': 3} {'type': 'loss', 'content': 0.0006088269292376935, 'timestamp': '2025-09-10 02:26:35.549424', 'step': 5052, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 5695507448064}, 'timestamp': '2025-09-10 02:26:35.582289', 'step': 5052, 'epoch': 3} {'type': 'loss', 'content': 0.00042608132935129106, 'timestamp': '2025-09-10 02:26:35.585338', 'step': 5053, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 176], 'flops': 5220903722048}, 'timestamp': '2025-09-10 02:26:35.617714', 'step': 5053, 'epoch': 3} {'type': 'loss', 'content': 0.0003567171806935221, 'timestamp': '2025-09-10 02:26:35.621800', 'step': 5054, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 8068526078144}, 'timestamp': '2025-09-10 02:26:35.653481', 'step': 5054, 'epoch': 3} {'type': 'loss', 'content': 0.0005917255766689777, 'timestamp': '2025-09-10 02:26:35.663678', 'step': 5055, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 4746299996032}, 'timestamp': '2025-09-10 02:26:35.694835', 'step': 5055, 'epoch': 3} {'type': 'loss', 'content': 0.00010045560338767245, 'timestamp': '2025-09-10 02:26:35.718582', 'step': 5056, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 6644714900096}, 'timestamp': '2025-09-10 02:26:35.750639', 'step': 5056, 'epoch': 3} {'type': 'loss', 'content': 0.0001807038497645408, 'timestamp': '2025-09-10 02:26:35.755218', 'step': 5057, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 7119318626112}, 'timestamp': '2025-09-10 02:26:35.790888', 'step': 5057, 'epoch': 3} {'type': 'loss', 'content': 0.001338689005933702, 'timestamp': '2025-09-10 02:26:35.798429', 'step': 5058, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 304], 'flops': 9017733530176}, 'timestamp': '2025-09-10 02:26:35.833677', 'step': 5058, 'epoch': 3} {'type': 'loss', 'content': 0.0007186224684119225, 'timestamp': '2025-09-10 02:26:35.845938', 'step': 5059, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 176], 'flops': 5220903722048}, 'timestamp': '2025-09-10 02:26:35.876959', 'step': 5059, 'epoch': 3} {'type': 'loss', 'content': 0.001849995693191886, 'timestamp': '2025-09-10 02:26:35.902008', 'step': 5060, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 8543129804160}, 'timestamp': '2025-09-10 02:26:35.936006', 'step': 5060, 'epoch': 3} {'type': 'loss', 'content': 0.00040959817124530673, 'timestamp': '2025-09-10 02:26:35.944168', 'step': 5061, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 7119318626112}, 'timestamp': '2025-09-10 02:26:35.978195', 'step': 5061, 'epoch': 3} {'type': 'loss', 'content': 0.0007337800343520939, 'timestamp': '2025-09-10 02:26:35.985672', 'step': 5062, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 6644714900096}, 'timestamp': '2025-09-10 02:26:36.017177', 'step': 5062, 'epoch': 3} {'type': 'loss', 'content': 0.004766891244798899, 'timestamp': '2025-09-10 02:26:36.024108', 'step': 5063, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 6644714900096}, 'timestamp': '2025-09-10 02:26:36.057466', 'step': 5063, 'epoch': 3} {'type': 'loss', 'content': 0.0009358166716992855, 'timestamp': '2025-09-10 02:26:36.085206', 'step': 5064, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 8543129804160}, 'timestamp': '2025-09-10 02:26:36.119710', 'step': 5064, 'epoch': 3} {'type': 'loss', 'content': 0.0004207981692161411, 'timestamp': '2025-09-10 02:26:36.128101', 'step': 5065, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 6170111174080}, 'timestamp': '2025-09-10 02:26:36.160614', 'step': 5065, 'epoch': 3} {'type': 'loss', 'content': 0.0005621476448141038, 'timestamp': '2025-09-10 02:26:36.167553', 'step': 5066, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 6644714900096}, 'timestamp': '2025-09-10 02:26:36.198941', 'step': 5066, 'epoch': 3} {'type': 'loss', 'content': 0.0005337732727639377, 'timestamp': '2025-09-10 02:26:36.205778', 'step': 5067, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 7593922352128}, 'timestamp': '2025-09-10 02:26:36.239845', 'step': 5067, 'epoch': 3} {'type': 'loss', 'content': 0.0006483304314315319, 'timestamp': '2025-09-10 02:26:36.268263', 'step': 5068, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 304], 'flops': 9017733530176}, 'timestamp': '2025-09-10 02:26:36.299494', 'step': 5068, 'epoch': 3} {'type': 'loss', 'content': 0.0004982685786671937, 'timestamp': '2025-09-10 02:26:36.308801', 'step': 5069, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 336], 'flops': 9966940982208}, 'timestamp': '2025-09-10 02:26:36.348134', 'step': 5069, 'epoch': 3} {'type': 'loss', 'content': 0.000861111271660775, 'timestamp': '2025-09-10 02:26:36.361512', 'step': 5070, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 8543129804160}, 'timestamp': '2025-09-10 02:26:36.393080', 'step': 5070, 'epoch': 3} {'type': 'loss', 'content': 0.00017864606343209743, 'timestamp': '2025-09-10 02:26:36.403739', 'step': 5071, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 656], 'flops': 19459015502528}, 'timestamp': '2025-09-10 02:26:36.460224', 'step': 5071, 'epoch': 3} {'type': 'loss', 'content': 0.0005434316699393094, 'timestamp': '2025-09-10 02:26:36.504467', 'step': 5072, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 7119318626112}, 'timestamp': '2025-09-10 02:26:36.537697', 'step': 5072, 'epoch': 3} {'type': 'loss', 'content': 0.00014476195792667568, 'timestamp': '2025-09-10 02:26:36.542072', 'step': 5073, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 6170111174080}, 'timestamp': '2025-09-10 02:26:36.576057', 'step': 5073, 'epoch': 3} {'type': 'loss', 'content': 0.00019232665363233536, 'timestamp': '2025-09-10 02:26:36.582602', 'step': 5074, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 7593922352128}, 'timestamp': '2025-09-10 02:26:36.616440', 'step': 5074, 'epoch': 3} {'type': 'loss', 'content': 0.0004715229442808777, 'timestamp': '2025-09-10 02:26:36.624236', 'step': 5075, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 6644714900096}, 'timestamp': '2025-09-10 02:26:36.657634', 'step': 5075, 'epoch': 3} {'type': 'loss', 'content': 0.00015088057261891663, 'timestamp': '2025-09-10 02:26:36.685386', 'step': 5076, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 6170111174080}, 'timestamp': '2025-09-10 02:26:36.718883', 'step': 5076, 'epoch': 3} {'type': 'loss', 'content': 0.0018138757441192865, 'timestamp': '2025-09-10 02:26:36.723325', 'step': 5077, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 320], 'flops': 9492337256192}, 'timestamp': '2025-09-10 02:26:36.756301', 'step': 5077, 'epoch': 3} {'type': 'loss', 'content': 0.00036036456003785133, 'timestamp': '2025-09-10 02:26:36.768752', 'step': 5078, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 7119318626112}, 'timestamp': '2025-09-10 02:26:36.802805', 'step': 5078, 'epoch': 3} {'type': 'loss', 'content': 0.00014213754911907017, 'timestamp': '2025-09-10 02:26:36.809991', 'step': 5079, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 7593922352128}, 'timestamp': '2025-09-10 02:26:36.842491', 'step': 5079, 'epoch': 3} {'type': 'loss', 'content': 0.0002783481031656265, 'timestamp': '2025-09-10 02:26:36.870966', 'step': 5080, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 7119318626112}, 'timestamp': '2025-09-10 02:26:36.905049', 'step': 5080, 'epoch': 3} {'type': 'loss', 'content': 0.0010689280461519957, 'timestamp': '2025-09-10 02:26:36.909941', 'step': 5081, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 6644714900096}, 'timestamp': '2025-09-10 02:26:36.943472', 'step': 5081, 'epoch': 3} {'type': 'loss', 'content': 0.004427754320204258, 'timestamp': '2025-09-10 02:26:36.950393', 'step': 5082, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 6170111174080}, 'timestamp': '2025-09-10 02:26:36.981893', 'step': 5082, 'epoch': 3} {'type': 'loss', 'content': 0.0009393549407832325, 'timestamp': '2025-09-10 02:26:36.988419', 'step': 5083, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 6644714900096}, 'timestamp': '2025-09-10 02:26:37.020810', 'step': 5083, 'epoch': 3} {'type': 'loss', 'content': 0.021152915433049202, 'timestamp': '2025-09-10 02:26:37.048368', 'step': 5084, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 176], 'flops': 5220903722048}, 'timestamp': '2025-09-10 02:26:37.080873', 'step': 5084, 'epoch': 3} {'type': 'loss', 'content': 0.002822284121066332, 'timestamp': '2025-09-10 02:26:37.083125', 'step': 5085, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 8543129804160}, 'timestamp': '2025-09-10 02:26:37.116842', 'step': 5085, 'epoch': 3} {'type': 'loss', 'content': 0.049205031245946884, 'timestamp': '2025-09-10 02:26:37.127741', 'step': 5086, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 6644714900096}, 'timestamp': '2025-09-10 02:26:37.160653', 'step': 5086, 'epoch': 3} {'type': 'loss', 'content': 0.00024678107001818717, 'timestamp': '2025-09-10 02:26:37.167406', 'step': 5087, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 7119318626112}, 'timestamp': '2025-09-10 02:26:37.203699', 'step': 5087, 'epoch': 3} {'type': 'loss', 'content': 0.005720262881368399, 'timestamp': '2025-09-10 02:26:37.231850', 'step': 5088, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 6170111174080}, 'timestamp': '2025-09-10 02:26:37.263031', 'step': 5088, 'epoch': 3} {'type': 'loss', 'content': 0.001497715711593628, 'timestamp': '2025-09-10 02:26:37.267326', 'step': 5089, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 320], 'flops': 9492337256192}, 'timestamp': '2025-09-10 02:26:37.302313', 'step': 5089, 'epoch': 3} {'type': 'loss', 'content': 0.002114651957526803, 'timestamp': '2025-09-10 02:26:37.314889', 'step': 5090, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 512], 'flops': 15187581968384}, 'timestamp': '2025-09-10 02:26:37.356579', 'step': 5090, 'epoch': 3} {'type': 'loss', 'content': 0.011306433007121086, 'timestamp': '2025-09-10 02:26:37.374278', 'step': 5091, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 7119318626112}, 'timestamp': '2025-09-10 02:26:37.404869', 'step': 5091, 'epoch': 3} {'type': 'loss', 'content': 0.004676491022109985, 'timestamp': '2025-09-10 02:26:37.433034', 'step': 5092, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 6170111174080}, 'timestamp': '2025-09-10 02:26:37.462511', 'step': 5092, 'epoch': 3} {'type': 'loss', 'content': 0.0011396221816539764, 'timestamp': '2025-09-10 02:26:37.467061', 'step': 5093, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 6644714900096}, 'timestamp': '2025-09-10 02:26:37.496773', 'step': 5093, 'epoch': 3} {'type': 'loss', 'content': 0.000759919814299792, 'timestamp': '2025-09-10 02:26:37.503790', 'step': 5094, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 6170111174080}, 'timestamp': '2025-09-10 02:26:37.535802', 'step': 5094, 'epoch': 3} {'type': 'loss', 'content': 0.00020001002121716738, 'timestamp': '2025-09-10 02:26:37.542443', 'step': 5095, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 6170111174080}, 'timestamp': '2025-09-10 02:26:37.572089', 'step': 5095, 'epoch': 3} {'type': 'loss', 'content': 0.00017582971486262977, 'timestamp': '2025-09-10 02:26:37.599644', 'step': 5096, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 6170111174080}, 'timestamp': '2025-09-10 02:26:37.630671', 'step': 5096, 'epoch': 3} {'type': 'loss', 'content': 0.0004590843745972961, 'timestamp': '2025-09-10 02:26:37.635779', 'step': 5097, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 8068526078144}, 'timestamp': '2025-09-10 02:26:37.676345', 'step': 5097, 'epoch': 3} {'type': 'loss', 'content': 0.0002961141581181437, 'timestamp': '2025-09-10 02:26:37.686453', 'step': 5098, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 336], 'flops': 9966940982208}, 'timestamp': '2025-09-10 02:26:37.726004', 'step': 5098, 'epoch': 3} {'type': 'loss', 'content': 0.00022437986626755446, 'timestamp': '2025-09-10 02:26:37.739344', 'step': 5099, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 6170111174080}, 'timestamp': '2025-09-10 02:26:37.773356', 'step': 5099, 'epoch': 3} {'type': 'loss', 'content': 0.0002960095298476517, 'timestamp': '2025-09-10 02:26:37.800799', 'step': 5100, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 6644714900096}, 'timestamp': '2025-09-10 02:26:37.831592', 'step': 5100, 'epoch': 3} {'type': 'loss', 'content': 0.00015804110444150865, 'timestamp': '2025-09-10 02:26:37.836206', 'step': 5101, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 352], 'flops': 10441544708224}, 'timestamp': '2025-09-10 02:26:37.871853', 'step': 5101, 'epoch': 3} {'type': 'loss', 'content': 0.0004349082300905138, 'timestamp': '2025-09-10 02:26:37.885536', 'step': 5102, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 6644714900096}, 'timestamp': '2025-09-10 02:26:37.915557', 'step': 5102, 'epoch': 3} {'type': 'loss', 'content': 0.0009989996906369925, 'timestamp': '2025-09-10 02:26:37.922804', 'step': 5103, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 176], 'flops': 5220903722048}, 'timestamp': '2025-09-10 02:26:37.956305', 'step': 5103, 'epoch': 3} {'type': 'loss', 'content': 0.0002965817984659225, 'timestamp': '2025-09-10 02:26:37.981392', 'step': 5104, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 8543129804160}, 'timestamp': '2025-09-10 02:26:38.019315', 'step': 5104, 'epoch': 3} {'type': 'loss', 'content': 0.00011326335516059771, 'timestamp': '2025-09-10 02:26:38.027976', 'step': 5105, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 7593922352128}, 'timestamp': '2025-09-10 02:26:38.059644', 'step': 5105, 'epoch': 3} {'type': 'loss', 'content': 0.0006430582143366337, 'timestamp': '2025-09-10 02:26:38.067309', 'step': 5106, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 8068526078144}, 'timestamp': '2025-09-10 02:26:38.103324', 'step': 5106, 'epoch': 3} {'type': 'loss', 'content': 0.03719168156385422, 'timestamp': '2025-09-10 02:26:38.113064', 'step': 5107, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 8068526078144}, 'timestamp': '2025-09-10 02:26:38.150802', 'step': 5107, 'epoch': 3} {'type': 'loss', 'content': 0.0005424571572802961, 'timestamp': '2025-09-10 02:26:38.181667', 'step': 5108, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 7119318626112}, 'timestamp': '2025-09-10 02:26:38.214866', 'step': 5108, 'epoch': 3} {'type': 'loss', 'content': 0.00010797369759529829, 'timestamp': '2025-09-10 02:26:38.219882', 'step': 5109, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 320], 'flops': 9492337256192}, 'timestamp': '2025-09-10 02:26:38.251030', 'step': 5109, 'epoch': 3} {'type': 'loss', 'content': 8.883118425728753e-05, 'timestamp': '2025-09-10 02:26:38.263566', 'step': 5110, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 6644714900096}, 'timestamp': '2025-09-10 02:26:38.294698', 'step': 5110, 'epoch': 3} {'type': 'loss', 'content': 5.636332571157254e-05, 'timestamp': '2025-09-10 02:26:38.301741', 'step': 5111, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 7119318626112}, 'timestamp': '2025-09-10 02:26:38.333874', 'step': 5111, 'epoch': 3} {'type': 'loss', 'content': 0.00013791404489893466, 'timestamp': '2025-09-10 02:26:38.362134', 'step': 5112, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 7119318626112}, 'timestamp': '2025-09-10 02:26:38.402640', 'step': 5112, 'epoch': 3} {'type': 'loss', 'content': 0.0002762853109743446, 'timestamp': '2025-09-10 02:26:38.407777', 'step': 5113, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 5695507448064}, 'timestamp': '2025-09-10 02:26:38.449345', 'step': 5113, 'epoch': 3} {'type': 'loss', 'content': 0.00036431459011510015, 'timestamp': '2025-09-10 02:26:38.453859', 'step': 5114, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 6170111174080}, 'timestamp': '2025-09-10 02:26:38.484171', 'step': 5114, 'epoch': 3} {'type': 'loss', 'content': 0.002119356067851186, 'timestamp': '2025-09-10 02:26:38.490985', 'step': 5115, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 8068526078144}, 'timestamp': '2025-09-10 02:26:38.537020', 'step': 5115, 'epoch': 3} {'type': 'loss', 'content': 0.0015236083418130875, 'timestamp': '2025-09-10 02:26:38.568091', 'step': 5116, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 320], 'flops': 9492337256192}, 'timestamp': '2025-09-10 02:26:38.600258', 'step': 5116, 'epoch': 3} {'type': 'loss', 'content': 0.0002494109212420881, 'timestamp': '2025-09-10 02:26:38.610000', 'step': 5117, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 5695507448064}, 'timestamp': '2025-09-10 02:26:38.642033', 'step': 5117, 'epoch': 3} {'type': 'loss', 'content': 0.00010412498522782698, 'timestamp': '2025-09-10 02:26:38.646738', 'step': 5118, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 6644714900096}, 'timestamp': '2025-09-10 02:26:38.678539', 'step': 5118, 'epoch': 3} {'type': 'loss', 'content': 7.976993947522715e-05, 'timestamp': '2025-09-10 02:26:38.685400', 'step': 5119, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 6170111174080}, 'timestamp': '2025-09-10 02:26:38.718264', 'step': 5119, 'epoch': 3} {'type': 'loss', 'content': 0.002568106632679701, 'timestamp': '2025-09-10 02:26:38.745928', 'step': 5120, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 7119318626112}, 'timestamp': '2025-09-10 02:26:38.780243', 'step': 5120, 'epoch': 3} {'type': 'loss', 'content': 0.0005041944095864892, 'timestamp': '2025-09-10 02:26:38.785032', 'step': 5121, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 304], 'flops': 9017733530176}, 'timestamp': '2025-09-10 02:26:38.817838', 'step': 5121, 'epoch': 3} {'type': 'loss', 'content': 0.00011331056157359853, 'timestamp': '2025-09-10 02:26:38.829518', 'step': 5122, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 320], 'flops': 9492337256192}, 'timestamp': '2025-09-10 02:26:38.862267', 'step': 5122, 'epoch': 3} {'type': 'loss', 'content': 0.00043031698442064226, 'timestamp': '2025-09-10 02:26:38.874856', 'step': 5123, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 7119318626112}, 'timestamp': '2025-09-10 02:26:38.908541', 'step': 5123, 'epoch': 3} {'type': 'loss', 'content': 0.002998805372044444, 'timestamp': '2025-09-10 02:26:38.936629', 'step': 5124, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 6644714900096}, 'timestamp': '2025-09-10 02:26:38.968508', 'step': 5124, 'epoch': 3} {'type': 'loss', 'content': 0.01266338862478733, 'timestamp': '2025-09-10 02:26:38.973088', 'step': 5125, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 6170111174080}, 'timestamp': '2025-09-10 02:26:39.003978', 'step': 5125, 'epoch': 3} {'type': 'loss', 'content': 0.0002824350376613438, 'timestamp': '2025-09-10 02:26:39.010652', 'step': 5126, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 5695507448064}, 'timestamp': '2025-09-10 02:26:39.042165', 'step': 5126, 'epoch': 3} {'type': 'loss', 'content': 0.00010247425962006673, 'timestamp': '2025-09-10 02:26:39.046697', 'step': 5127, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 6170111174080}, 'timestamp': '2025-09-10 02:26:39.081094', 'step': 5127, 'epoch': 3} {'type': 'loss', 'content': 0.0008860399248078465, 'timestamp': '2025-09-10 02:26:39.108780', 'step': 5128, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 6644714900096}, 'timestamp': '2025-09-10 02:26:39.143111', 'step': 5128, 'epoch': 3} {'type': 'loss', 'content': 8.454523049294949e-05, 'timestamp': '2025-09-10 02:26:39.147281', 'step': 5129, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 8068526078144}, 'timestamp': '2025-09-10 02:26:39.187542', 'step': 5129, 'epoch': 3} {'type': 'loss', 'content': 0.0003048314538318664, 'timestamp': '2025-09-10 02:26:39.197351', 'step': 5130, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 8068526078144}, 'timestamp': '2025-09-10 02:26:39.230939', 'step': 5130, 'epoch': 3} {'type': 'loss', 'content': 0.00018424120207782835, 'timestamp': '2025-09-10 02:26:39.240890', 'step': 5131, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 7119318626112}, 'timestamp': '2025-09-10 02:26:39.272716', 'step': 5131, 'epoch': 3} {'type': 'loss', 'content': 0.0006012596422806382, 'timestamp': '2025-09-10 02:26:39.300808', 'step': 5132, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 8068526078144}, 'timestamp': '2025-09-10 02:26:39.332691', 'step': 5132, 'epoch': 3} {'type': 'loss', 'content': 0.0003009784559253603, 'timestamp': '2025-09-10 02:26:39.339735', 'step': 5133, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 8068526078144}, 'timestamp': '2025-09-10 02:26:39.375609', 'step': 5133, 'epoch': 3} {'type': 'loss', 'content': 0.03187219798564911, 'timestamp': '2025-09-10 02:26:39.385234', 'step': 5134, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 8543129804160}, 'timestamp': '2025-09-10 02:26:39.423679', 'step': 5134, 'epoch': 3} {'type': 'loss', 'content': 0.0010775693226605654, 'timestamp': '2025-09-10 02:26:39.434241', 'step': 5135, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 8543129804160}, 'timestamp': '2025-09-10 02:26:39.464830', 'step': 5135, 'epoch': 3} {'type': 'loss', 'content': 0.001194292795844376, 'timestamp': '2025-09-10 02:26:39.496349', 'step': 5136, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 8068526078144}, 'timestamp': '2025-09-10 02:26:39.527484', 'step': 5136, 'epoch': 3} {'type': 'loss', 'content': 0.0019652375485748053, 'timestamp': '2025-09-10 02:26:39.535242', 'step': 5137, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 7593922352128}, 'timestamp': '2025-09-10 02:26:39.568984', 'step': 5137, 'epoch': 3} {'type': 'loss', 'content': 0.010311486199498177, 'timestamp': '2025-09-10 02:26:39.576682', 'step': 5138, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 6644714900096}, 'timestamp': '2025-09-10 02:26:39.606200', 'step': 5138, 'epoch': 3} {'type': 'loss', 'content': 0.00019729572522919625, 'timestamp': '2025-09-10 02:26:39.613264', 'step': 5139, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 7119318626112}, 'timestamp': '2025-09-10 02:26:39.643471', 'step': 5139, 'epoch': 3} {'type': 'loss', 'content': 0.00033027934841811657, 'timestamp': '2025-09-10 02:26:39.671867', 'step': 5140, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 176], 'flops': 5220903722048}, 'timestamp': '2025-09-10 02:26:39.703487', 'step': 5140, 'epoch': 3} {'type': 'loss', 'content': 0.0022352919913828373, 'timestamp': '2025-09-10 02:26:39.705615', 'step': 5141, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 8068526078144}, 'timestamp': '2025-09-10 02:26:39.737702', 'step': 5141, 'epoch': 3} {'type': 'loss', 'content': 0.002084512962028384, 'timestamp': '2025-09-10 02:26:39.746825', 'step': 5142, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 8543129804160}, 'timestamp': '2025-09-10 02:26:39.778404', 'step': 5142, 'epoch': 3} {'type': 'loss', 'content': 0.00020568576292134821, 'timestamp': '2025-09-10 02:26:39.789245', 'step': 5143, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 8068526078144}, 'timestamp': '2025-09-10 02:26:39.818748', 'step': 5143, 'epoch': 3} {'type': 'loss', 'content': 0.00034350433270446956, 'timestamp': '2025-09-10 02:26:39.849832', 'step': 5144, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 304], 'flops': 9017733530176}, 'timestamp': '2025-09-10 02:26:39.882711', 'step': 5144, 'epoch': 3} {'type': 'loss', 'content': 9.809032781049609e-05, 'timestamp': '2025-09-10 02:26:39.892207', 'step': 5145, 'epoch': 3} {'type': 'flops', 'content': [{'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5695213340928}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3796808916480}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4429610391296}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4746011128704}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4746011128704}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6011614078336}, {'type': 'perplexity', 'in_batch_dim': [8, 736], 'batch_size': 8, 'flops': 14554433988352}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3796808916480}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4113209653888}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3796808916480}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4746011128704}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2847606704256}, {'type': 'perplexity', 'in_batch_dim': [8, 320], 'batch_size': 8, 'flops': 6328014815744}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5378812603520}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3796808916480}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4746011128704}, {'type': 'perplexity', 'in_batch_dim': [8, 352], 'batch_size': 8, 'flops': 6960816290560}, {'type': 'perplexity', 'in_batch_dim': [8, 384], 'batch_size': 8, 'flops': 7593617765376}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5378812603520}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4429610391296}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4113209653888}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5062411866112}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5062411866112}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4429610391296}, {'type': 'perplexity', 'in_batch_dim': [8, 480], 'batch_size': 8, 'flops': 9492022189824}, {'type': 'perplexity', 'in_batch_dim': [8, 448], 'batch_size': 8, 'flops': 8859220715008}, {'type': 'perplexity', 'in_batch_dim': [8, 320], 'batch_size': 8, 'flops': 6328014815744}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3796808916480}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6011614078336}, {'type': 'perplexity', 'in_batch_dim': [8, 320], 'batch_size': 8, 'flops': 6328014815744}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5695213340928}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4746011128704}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4429610391296}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5378812603520}, {'type': 'perplexity', 'in_batch_dim': [8, 544], 'batch_size': 8, 'flops': 10757625139456}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4429610391296}, {'type': 'perplexity', 'in_batch_dim': [8, 320], 'batch_size': 8, 'flops': 6328014815744}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4113209653888}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4113209653888}, {'type': 'perplexity', 'in_batch_dim': [8, 320], 'batch_size': 8, 'flops': 6328014815744}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3480408179072}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5062411866112}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6011614078336}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4429610391296}, {'type': 'perplexity', 'in_batch_dim': [8, 384], 'batch_size': 8, 'flops': 7593617765376}, {'type': 'perplexity', 'in_batch_dim': [8, 416], 'batch_size': 8, 'flops': 8226419240192}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4746011128704}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3480408179072}, {'type': 'perplexity', 'in_batch_dim': [8, 384], 'batch_size': 8, 'flops': 7593617765376}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3480408179072}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3480408179072}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5378812603520}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3796808916480}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4113209653888}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5062411866112}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5378812603520}, {'type': 'perplexity', 'in_batch_dim': [8, 320], 'batch_size': 8, 'flops': 6328014815744}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4429610391296}, {'type': 'perplexity', 'in_batch_dim': [8, 160], 'batch_size': 8, 'flops': 3164007441664}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4113209653888}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5695213340928}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3796808916480}, {'type': 'perplexity', 'in_batch_dim': [8, 160], 'batch_size': 8, 'flops': 3164007441664}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6011614078336}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5062411866112}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4429610391296}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3480408179072}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4746011128704}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3480408179072}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4113209653888}, {'type': 'perplexity', 'in_batch_dim': [8, 624], 'batch_size': 8, 'flops': 12339628826496}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4113209653888}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5695213340928}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5062411866112}, {'type': 'perplexity', 'in_batch_dim': [8, 384], 'batch_size': 8, 'flops': 7593617765376}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4429610391296}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4113209653888}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5062411866112}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4429610391296}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5695213340928}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3796808916480}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5378812603520}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4429610391296}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5378812603520}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3796808916480}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5695213340928}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4429610391296}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5062411866112}, {'type': 'perplexity', 'in_batch_dim': [8, 160], 'batch_size': 8, 'flops': 3164007441664}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4746011128704}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5695213340928}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3796808916480}, {'type': 'perplexity', 'in_batch_dim': [8, 416], 'batch_size': 8, 'flops': 8226419240192}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3796808916480}, {'type': 'perplexity', 'in_batch_dim': [8, 160], 'batch_size': 8, 'flops': 3164007441664}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4746011128704}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3796808916480}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2847606704256}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5378812603520}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4429610391296}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4113209653888}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4429610391296}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6011614078336}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4113209653888}, {'type': 'perplexity', 'in_batch_dim': [8, 320], 'batch_size': 8, 'flops': 6328014815744}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4746011128704}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4746011128704}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3796808916480}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5062411866112}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3480408179072}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3796808916480}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3480408179072}, {'type': 'perplexity', 'in_batch_dim': [8, 352], 'batch_size': 8, 'flops': 6960816290560}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5062411866112}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4429610391296}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6011614078336}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5062411866112}, {'type': 'perplexity', 'in_batch_dim': [8, 576], 'batch_size': 8, 'flops': 11390426614272}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4429610391296}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3796808916480}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6011614078336}, {'type': 'perplexity', 'in_batch_dim': [8, 432], 'batch_size': 8, 'flops': 8542819977600}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5378812603520}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5378812603520}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5062411866112}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4113209653888}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4113209653888}, {'type': 'perplexity', 'in_batch_dim': [8, 368], 'batch_size': 8, 'flops': 7277217027968}, {'type': 'perplexity', 'in_batch_dim': [8, 160], 'batch_size': 8, 'flops': 3164007441664}, {'type': 'perplexity', 'in_batch_dim': [8, 368], 'batch_size': 8, 'flops': 7277217027968}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3796808916480}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4746011128704}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4113209653888}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4429610391296}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5695213340928}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5695213340928}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5062411866112}, {'type': 'perplexity', 'in_batch_dim': [8, 432], 'batch_size': 8, 'flops': 8542819977600}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5062411866112}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4113209653888}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3480408179072}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3480408179072}, {'type': 'perplexity', 'in_batch_dim': [8, 160], 'batch_size': 8, 'flops': 3164007441664}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4113209653888}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3796808916480}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5062411866112}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 368], 'batch_size': 8, 'flops': 7277217027968}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5378812603520}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5378812603520}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5062411866112}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2847606704256}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4429610391296}, {'type': 'perplexity', 'in_batch_dim': [8, 320], 'batch_size': 8, 'flops': 6328014815744}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3796808916480}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4429610391296}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4113209653888}, {'type': 'perplexity', 'in_batch_dim': [8, 384], 'batch_size': 8, 'flops': 7593617765376}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5378812603520}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4746011128704}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3480408179072}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5062411866112}, {'type': 'perplexity', 'in_batch_dim': [8, 320], 'batch_size': 8, 'flops': 6328014815744}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5378812603520}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4113209653888}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4113209653888}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5695213340928}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3480408179072}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6011614078336}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4746011128704}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5378812603520}, {'type': 'perplexity', 'in_batch_dim': [8, 320], 'batch_size': 8, 'flops': 6328014815744}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4429610391296}, {'type': 'perplexity', 'in_batch_dim': [8, 320], 'batch_size': 8, 'flops': 6328014815744}, {'type': 'perplexity', 'in_batch_dim': [8, 416], 'batch_size': 8, 'flops': 8226419240192}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5062411866112}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5062411866112}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2847606704256}, {'type': 'perplexity', 'in_batch_dim': [8, 336], 'batch_size': 8, 'flops': 6644415553152}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6011614078336}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5062411866112}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4113209653888}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5695213340928}, {'type': 'perplexity', 'in_batch_dim': [8, 416], 'batch_size': 8, 'flops': 8226419240192}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3796808916480}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5695213340928}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4746011128704}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4429610391296}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4113209653888}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3480408179072}, {'type': 'perplexity', 'in_batch_dim': [8, 320], 'batch_size': 8, 'flops': 6328014815744}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3480408179072}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5378812603520}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6011614078336}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4429610391296}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5695213340928}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5695213340928}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3480408179072}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4429610391296}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5062411866112}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3480408179072}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4113209653888}, {'type': 'perplexity', 'in_batch_dim': [8, 352], 'batch_size': 8, 'flops': 6960816290560}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4113209653888}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2847606704256}, {'type': 'perplexity', 'in_batch_dim': [8, 160], 'batch_size': 8, 'flops': 3164007441664}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3796808916480}, {'type': 'perplexity', 'in_batch_dim': [8, 496], 'batch_size': 8, 'flops': 9808422927232}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3480408179072}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4113209653888}, {'type': 'perplexity', 'in_batch_dim': [8, 544], 'batch_size': 8, 'flops': 10757625139456}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6011614078336}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5378812603520}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5695213340928}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4746011128704}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5695213340928}, {'type': 'perplexity', 'in_batch_dim': [8, 384], 'batch_size': 8, 'flops': 7593617765376}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3480408179072}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5378812603520}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3480408179072}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5378812603520}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5062411866112}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3796808916480}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5062411866112}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5695213340928}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5378812603520}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4429610391296}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5062411866112}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3480408179072}, {'type': 'perplexity', 'in_batch_dim': [8, 656], 'batch_size': 8, 'flops': 12972430301312}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5062411866112}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3796808916480}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3480408179072}, {'type': 'perplexity', 'in_batch_dim': [8, 160], 'batch_size': 8, 'flops': 3164007441664}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5695213340928}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6011614078336}, {'type': 'perplexity', 'in_batch_dim': [8, 320], 'batch_size': 8, 'flops': 6328014815744}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5062411866112}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5062411866112}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5695213340928}, {'type': 'perplexity', 'in_batch_dim': [8, 320], 'batch_size': 8, 'flops': 6328014815744}, {'type': 'perplexity', 'in_batch_dim': [8, 384], 'batch_size': 8, 'flops': 7593617765376}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5062411866112}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4746011128704}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4113209653888}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4746011128704}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4746011128704}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3480408179072}, {'type': 'perplexity', 'in_batch_dim': [8, 320], 'batch_size': 8, 'flops': 6328014815744}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5695213340928}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5062411866112}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4113209653888}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4746011128704}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2847606704256}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3796808916480}, {'type': 'perplexity', 'in_batch_dim': [8, 336], 'batch_size': 8, 'flops': 6644415553152}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4113209653888}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4746011128704}, {'type': 'perplexity', 'in_batch_dim': [8, 496], 'batch_size': 8, 'flops': 9808422927232}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5378812603520}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5378812603520}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4429610391296}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5062411866112}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6011614078336}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4746011128704}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3796808916480}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4113209653888}, {'type': 'perplexity', 'in_batch_dim': [8, 496], 'batch_size': 8, 'flops': 9808422927232}, {'type': 'perplexity', 'in_batch_dim': [8, 448], 'batch_size': 8, 'flops': 8859220715008}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6011614078336}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3796808916480}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3480408179072}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5062411866112}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4429610391296}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4113209653888}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4746011128704}, {'type': 'perplexity', 'in_batch_dim': [8, 320], 'batch_size': 8, 'flops': 6328014815744}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6011614078336}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3480408179072}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3796808916480}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3796808916480}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4746011128704}, {'type': 'perplexity', 'in_batch_dim': [8, 416], 'batch_size': 8, 'flops': 8226419240192}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5695213340928}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6011614078336}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6011614078336}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4746011128704}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3480408179072}, {'type': 'perplexity', 'in_batch_dim': [8, 160], 'batch_size': 8, 'flops': 3164007441664}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5695213340928}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3480408179072}, {'type': 'perplexity', 'in_batch_dim': [8, 400], 'batch_size': 8, 'flops': 7910018502784}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4429610391296}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5378812603520}, {'type': 'perplexity', 'in_batch_dim': [8, 400], 'batch_size': 8, 'flops': 7910018502784}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5378812603520}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3480408179072}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6011614078336}, {'type': 'perplexity', 'in_batch_dim': [8, 160], 'batch_size': 8, 'flops': 3164007441664}, {'type': 'perplexity', 'in_batch_dim': [8, 352], 'batch_size': 8, 'flops': 6960816290560}, {'type': 'perplexity', 'in_batch_dim': [8, 320], 'batch_size': 8, 'flops': 6328014815744}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5062411866112}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5062411866112}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4113209653888}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4429610391296}, {'type': 'perplexity', 'in_batch_dim': [8, 352], 'batch_size': 8, 'flops': 6960816290560}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4113209653888}, {'type': 'perplexity', 'in_batch_dim': [8, 160], 'batch_size': 8, 'flops': 3164007441664}, {'type': 'perplexity', 'in_batch_dim': [8, 336], 'batch_size': 8, 'flops': 6644415553152}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4429610391296}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3796808916480}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5378812603520}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5378812603520}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4113209653888}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5378812603520}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3796808916480}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5378812603520}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5378812603520}, {'type': 'perplexity', 'in_batch_dim': [8, 160], 'batch_size': 8, 'flops': 3164007441664}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3480408179072}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6011614078336}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5695213340928}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5378812603520}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5378812603520}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4429610391296}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4746011128704}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6011614078336}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5062411866112}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6011614078336}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5378812603520}, {'type': 'perplexity', 'in_batch_dim': [8, 160], 'batch_size': 8, 'flops': 3164007441664}, {'type': 'perplexity', 'in_batch_dim': [8, 160], 'batch_size': 8, 'flops': 3164007441664}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5062411866112}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5062411866112}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3796808916480}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6011614078336}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4113209653888}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4746011128704}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5378812603520}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3480408179072}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3796808916480}, {'type': 'perplexity', 'in_batch_dim': [8, 320], 'batch_size': 8, 'flops': 6328014815744}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5062411866112}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4746011128704}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5062411866112}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4746011128704}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6011614078336}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3796808916480}, {'type': 'perplexity', 'in_batch_dim': [8, 160], 'batch_size': 8, 'flops': 3164007441664}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2847606704256}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4429610391296}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4746011128704}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5062411866112}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5378812603520}, {'type': 'perplexity', 'in_batch_dim': [8, 464], 'batch_size': 8, 'flops': 9175621452416}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4113209653888}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3480408179072}, {'type': 'perplexity', 'in_batch_dim': [8, 384], 'batch_size': 8, 'flops': 7593617765376}, {'type': 'perplexity', 'in_batch_dim': [8, 448], 'batch_size': 8, 'flops': 8859220715008}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5378812603520}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4113209653888}, {'type': 'perplexity', 'in_batch_dim': [8, 320], 'batch_size': 8, 'flops': 6328014815744}, {'type': 'perplexity', 'in_batch_dim': [8, 560], 'batch_size': 8, 'flops': 11074025876864}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5378812603520}, {'type': 'perplexity', 'in_batch_dim': [8, 160], 'batch_size': 8, 'flops': 3164007441664}, {'type': 'perplexity', 'in_batch_dim': [8, 320], 'batch_size': 8, 'flops': 6328014815744}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5062411866112}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6011614078336}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4746011128704}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6011614078336}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3796808916480}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4429610391296}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4113209653888}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4746011128704}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3480408179072}, {'type': 'perplexity', 'in_batch_dim': [8, 384], 'batch_size': 8, 'flops': 7593617765376}, {'type': 'perplexity', 'in_batch_dim': [8, 576], 'batch_size': 8, 'flops': 11390426614272}, {'type': 'perplexity', 'in_batch_dim': [8, 160], 'batch_size': 8, 'flops': 3164007441664}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4746011128704}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4746011128704}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4429610391296}, {'type': 'perplexity', 'in_batch_dim': [8, 336], 'batch_size': 8, 'flops': 6644415553152}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4746011128704}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3796808916480}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3796808916480}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5062411866112}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5378812603520}, {'type': 'perplexity', 'in_batch_dim': [8, 368], 'batch_size': 8, 'flops': 7277217027968}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3480408179072}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4113209653888}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4746011128704}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3480408179072}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4746011128704}, {'type': 'perplexity', 'in_batch_dim': [8, 1168], 'batch_size': 8, 'flops': 23097253898368}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5062411866112}, {'type': 'perplexity', 'in_batch_dim': [8, 160], 'batch_size': 8, 'flops': 3164007441664}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4746011128704}, {'type': 'perplexity', 'in_batch_dim': [8, 640], 'batch_size': 8, 'flops': 12656029563904}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5378812603520}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4429610391296}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4429610391296}, {'type': 'perplexity', 'in_batch_dim': [8, 384], 'batch_size': 8, 'flops': 7593617765376}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3480408179072}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4113209653888}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3796808916480}, {'type': 'perplexity', 'in_batch_dim': [8, 336], 'batch_size': 8, 'flops': 6644415553152}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3480408179072}, {'type': 'perplexity', 'in_batch_dim': [6, 208], 'batch_size': 8, 'flops': 4113209653888}], 'timestamp': '2025-09-10 02:26:50.094432', 'step': 5145, 'epoch': 3} {'type': 'pplx', 'content': 19013441.374623075, 'timestamp': '2025-09-10 02:26:50.098424', 'step': 5145, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:26:50.128744', 'step': 5145, 'epoch': 3} {'type': 'loss', 'content': 0.0014527181629091501, 'timestamp': '2025-09-10 02:26:50.130787', 'step': 5146, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 6644714900096}, 'timestamp': '2025-09-10 02:26:50.162665', 'step': 5146, 'epoch': 3} {'type': 'loss', 'content': 0.027358056977391243, 'timestamp': '2025-09-10 02:26:50.168867', 'step': 5147, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 6170111174080}, 'timestamp': '2025-09-10 02:26:50.202488', 'step': 5147, 'epoch': 3} {'type': 'loss', 'content': 0.0004902129294350743, 'timestamp': '2025-09-10 02:26:50.229570', 'step': 5148, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 7119318626112}, 'timestamp': '2025-09-10 02:26:50.265151', 'step': 5148, 'epoch': 3} {'type': 'loss', 'content': 0.00014729471877217293, 'timestamp': '2025-09-10 02:26:50.269830', 'step': 5149, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 6644714900096}, 'timestamp': '2025-09-10 02:26:50.302158', 'step': 5149, 'epoch': 3} {'type': 'loss', 'content': 0.003401133930310607, 'timestamp': '2025-09-10 02:26:50.309049', 'step': 5150, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 304], 'flops': 9017733530176}, 'timestamp': '2025-09-10 02:26:50.340073', 'step': 5150, 'epoch': 3} {'type': 'loss', 'content': 0.00013149350706953555, 'timestamp': '2025-09-10 02:26:50.351481', 'step': 5151, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 336], 'flops': 9966940982208}, 'timestamp': '2025-09-10 02:26:50.385466', 'step': 5151, 'epoch': 3} {'type': 'loss', 'content': 0.01326068490743637, 'timestamp': '2025-09-10 02:26:50.419683', 'step': 5152, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 6644714900096}, 'timestamp': '2025-09-10 02:26:50.451962', 'step': 5152, 'epoch': 3} {'type': 'loss', 'content': 0.00036567659117281437, 'timestamp': '2025-09-10 02:26:50.456231', 'step': 5153, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 7593922352128}, 'timestamp': '2025-09-10 02:26:50.488390', 'step': 5153, 'epoch': 3} {'type': 'loss', 'content': 0.013120094314217567, 'timestamp': '2025-09-10 02:26:50.495873', 'step': 5154, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 7119318626112}, 'timestamp': '2025-09-10 02:26:50.529639', 'step': 5154, 'epoch': 3} {'type': 'loss', 'content': 0.00017451155872549862, 'timestamp': '2025-09-10 02:26:50.536633', 'step': 5155, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 5695507448064}, 'timestamp': '2025-09-10 02:26:50.567172', 'step': 5155, 'epoch': 3} {'type': 'loss', 'content': 0.00037706297007389367, 'timestamp': '2025-09-10 02:26:50.592290', 'step': 5156, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 5695507448064}, 'timestamp': '2025-09-10 02:26:50.624924', 'step': 5156, 'epoch': 3} {'type': 'loss', 'content': 0.0002998369454871863, 'timestamp': '2025-09-10 02:26:50.628990', 'step': 5157, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 5695507448064}, 'timestamp': '2025-09-10 02:26:50.661318', 'step': 5157, 'epoch': 3} {'type': 'loss', 'content': 0.0006355083896778524, 'timestamp': '2025-09-10 02:26:50.665583', 'step': 5158, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 384], 'flops': 11390752160256}, 'timestamp': '2025-09-10 02:26:50.700149', 'step': 5158, 'epoch': 3} {'type': 'loss', 'content': 0.0024739918299019337, 'timestamp': '2025-09-10 02:26:50.714091', 'step': 5159, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 5695507448064}, 'timestamp': '2025-09-10 02:26:50.747276', 'step': 5159, 'epoch': 3} {'type': 'loss', 'content': 0.0006842431612312794, 'timestamp': '2025-09-10 02:26:50.772316', 'step': 5160, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 176], 'flops': 5220903722048}, 'timestamp': '2025-09-10 02:26:50.804366', 'step': 5160, 'epoch': 3} {'type': 'loss', 'content': 0.0001715581602184102, 'timestamp': '2025-09-10 02:26:50.806614', 'step': 5161, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 320], 'flops': 9492337256192}, 'timestamp': '2025-09-10 02:26:50.838444', 'step': 5161, 'epoch': 3} {'type': 'loss', 'content': 0.0001334312546532601, 'timestamp': '2025-09-10 02:26:50.850899', 'step': 5162, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 8543129804160}, 'timestamp': '2025-09-10 02:26:50.882059', 'step': 5162, 'epoch': 3} {'type': 'loss', 'content': 0.0015925957122817636, 'timestamp': '2025-09-10 02:26:50.892413', 'step': 5163, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 6644714900096}, 'timestamp': '2025-09-10 02:26:50.924795', 'step': 5163, 'epoch': 3} {'type': 'loss', 'content': 0.035536449402570724, 'timestamp': '2025-09-10 02:26:50.952247', 'step': 5164, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 6644714900096}, 'timestamp': '2025-09-10 02:26:50.984434', 'step': 5164, 'epoch': 3} {'type': 'loss', 'content': 0.032359056174755096, 'timestamp': '2025-09-10 02:26:50.988981', 'step': 5165, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 5695507448064}, 'timestamp': '2025-09-10 02:26:51.019851', 'step': 5165, 'epoch': 3} {'type': 'loss', 'content': 0.0012751846807077527, 'timestamp': '2025-09-10 02:26:51.024166', 'step': 5166, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 5695507448064}, 'timestamp': '2025-09-10 02:26:51.057009', 'step': 5166, 'epoch': 3} {'type': 'loss', 'content': 0.0008924083085730672, 'timestamp': '2025-09-10 02:26:51.061492', 'step': 5167, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 320], 'flops': 9492337256192}, 'timestamp': '2025-09-10 02:26:51.094260', 'step': 5167, 'epoch': 3} {'type': 'loss', 'content': 0.0009923691395670176, 'timestamp': '2025-09-10 02:26:51.127680', 'step': 5168, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 8068526078144}, 'timestamp': '2025-09-10 02:26:51.159718', 'step': 5168, 'epoch': 3} {'type': 'loss', 'content': 0.004293483681976795, 'timestamp': '2025-09-10 02:26:51.167509', 'step': 5169, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 176], 'flops': 5220903722048}, 'timestamp': '2025-09-10 02:26:51.198390', 'step': 5169, 'epoch': 3} {'type': 'loss', 'content': 0.0001559741358505562, 'timestamp': '2025-09-10 02:26:51.202516', 'step': 5170, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 7593922352128}, 'timestamp': '2025-09-10 02:26:51.234801', 'step': 5170, 'epoch': 3} {'type': 'loss', 'content': 0.0003882385208271444, 'timestamp': '2025-09-10 02:26:51.242426', 'step': 5171, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 5695507448064}, 'timestamp': '2025-09-10 02:26:51.274436', 'step': 5171, 'epoch': 3} {'type': 'loss', 'content': 0.0005971429636701941, 'timestamp': '2025-09-10 02:26:51.299776', 'step': 5172, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 7593922352128}, 'timestamp': '2025-09-10 02:26:51.330997', 'step': 5172, 'epoch': 3} {'type': 'loss', 'content': 0.00021882994042243809, 'timestamp': '2025-09-10 02:26:51.336487', 'step': 5173, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 7119318626112}, 'timestamp': '2025-09-10 02:26:51.367881', 'step': 5173, 'epoch': 3} {'type': 'loss', 'content': 0.0011246565263718367, 'timestamp': '2025-09-10 02:26:51.375480', 'step': 5174, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 5695507448064}, 'timestamp': '2025-09-10 02:26:51.405682', 'step': 5174, 'epoch': 3} {'type': 'loss', 'content': 0.00022782094310969114, 'timestamp': '2025-09-10 02:26:51.410086', 'step': 5175, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 5695507448064}, 'timestamp': '2025-09-10 02:26:51.442061', 'step': 5175, 'epoch': 3} {'type': 'loss', 'content': 0.00039846167783252895, 'timestamp': '2025-09-10 02:26:51.467694', 'step': 5176, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 336], 'flops': 9966940982208}, 'timestamp': '2025-09-10 02:26:51.501857', 'step': 5176, 'epoch': 3} {'type': 'loss', 'content': 0.00024708619457669556, 'timestamp': '2025-09-10 02:26:51.514555', 'step': 5177, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 304], 'flops': 9017733530176}, 'timestamp': '2025-09-10 02:26:51.546565', 'step': 5177, 'epoch': 3} {'type': 'loss', 'content': 0.05931756645441055, 'timestamp': '2025-09-10 02:26:51.558679', 'step': 5178, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 5695507448064}, 'timestamp': '2025-09-10 02:26:51.590202', 'step': 5178, 'epoch': 3} {'type': 'loss', 'content': 0.0005889888852834702, 'timestamp': '2025-09-10 02:26:51.594559', 'step': 5179, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 6644714900096}, 'timestamp': '2025-09-10 02:26:51.625517', 'step': 5179, 'epoch': 3} {'type': 'loss', 'content': 0.00023044981935527176, 'timestamp': '2025-09-10 02:26:51.653377', 'step': 5180, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 7119318626112}, 'timestamp': '2025-09-10 02:26:51.684595', 'step': 5180, 'epoch': 3} {'type': 'loss', 'content': 0.0009921352611854672, 'timestamp': '2025-09-10 02:26:51.689662', 'step': 5181, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 8543129804160}, 'timestamp': '2025-09-10 02:26:51.720180', 'step': 5181, 'epoch': 3} {'type': 'loss', 'content': 0.0005538056720979512, 'timestamp': '2025-09-10 02:26:51.731255', 'step': 5182, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 8068526078144}, 'timestamp': '2025-09-10 02:26:51.762227', 'step': 5182, 'epoch': 3} {'type': 'loss', 'content': 0.0023367933463305235, 'timestamp': '2025-09-10 02:26:51.772542', 'step': 5183, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 6644714900096}, 'timestamp': '2025-09-10 02:26:51.804192', 'step': 5183, 'epoch': 3} {'type': 'loss', 'content': 0.0002975693787448108, 'timestamp': '2025-09-10 02:26:51.832091', 'step': 5184, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 8068526078144}, 'timestamp': '2025-09-10 02:26:51.864290', 'step': 5184, 'epoch': 3} {'type': 'loss', 'content': 0.0006394670926965773, 'timestamp': '2025-09-10 02:26:51.871192', 'step': 5185, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-10 02:26:51.902617', 'step': 5185, 'epoch': 3} {'type': 'loss', 'content': 0.0005975649692118168, 'timestamp': '2025-09-10 02:26:51.904913', 'step': 5186, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 6644714900096}, 'timestamp': '2025-09-10 02:26:51.937164', 'step': 5186, 'epoch': 3} {'type': 'loss', 'content': 0.0004425121296662837, 'timestamp': '2025-09-10 02:26:51.943970', 'step': 5187, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 7119318626112}, 'timestamp': '2025-09-10 02:26:51.975672', 'step': 5187, 'epoch': 3} {'type': 'loss', 'content': 0.00035937223583459854, 'timestamp': '2025-09-10 02:26:52.003857', 'step': 5188, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 176], 'flops': 5220903722048}, 'timestamp': '2025-09-10 02:26:52.035467', 'step': 5188, 'epoch': 3} {'type': 'loss', 'content': 0.0011359489290043712, 'timestamp': '2025-09-10 02:26:52.037712', 'step': 5189, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 6170111174080}, 'timestamp': '2025-09-10 02:26:52.068998', 'step': 5189, 'epoch': 3} {'type': 'loss', 'content': 0.003917438443750143, 'timestamp': '2025-09-10 02:26:52.075712', 'step': 5190, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 6170111174080}, 'timestamp': '2025-09-10 02:26:52.107588', 'step': 5190, 'epoch': 3} {'type': 'loss', 'content': 0.0001755694829626009, 'timestamp': '2025-09-10 02:26:52.114296', 'step': 5191, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 6644714900096}, 'timestamp': '2025-09-10 02:26:52.146621', 'step': 5191, 'epoch': 3} {'type': 'loss', 'content': 0.0007622348493896425, 'timestamp': '2025-09-10 02:26:52.174521', 'step': 5192, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 8068526078144}, 'timestamp': '2025-09-10 02:26:52.206264', 'step': 5192, 'epoch': 3} {'type': 'loss', 'content': 0.00104383728466928, 'timestamp': '2025-09-10 02:26:52.213874', 'step': 5193, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 5695507448064}, 'timestamp': '2025-09-10 02:26:52.246717', 'step': 5193, 'epoch': 3} {'type': 'loss', 'content': 0.00010719360579969361, 'timestamp': '2025-09-10 02:26:52.250995', 'step': 5194, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 7119318626112}, 'timestamp': '2025-09-10 02:26:52.282112', 'step': 5194, 'epoch': 3} {'type': 'loss', 'content': 0.00022146674746181816, 'timestamp': '2025-09-10 02:26:52.289447', 'step': 5195, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 7593922352128}, 'timestamp': '2025-09-10 02:26:52.321360', 'step': 5195, 'epoch': 3} {'type': 'loss', 'content': 0.000337046105414629, 'timestamp': '2025-09-10 02:26:52.349962', 'step': 5196, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 6644714900096}, 'timestamp': '2025-09-10 02:26:52.381024', 'step': 5196, 'epoch': 3} {'type': 'loss', 'content': 0.0005232029943726957, 'timestamp': '2025-09-10 02:26:52.385715', 'step': 5197, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 8068526078144}, 'timestamp': '2025-09-10 02:26:52.416162', 'step': 5197, 'epoch': 3} {'type': 'loss', 'content': 0.00024187321832869202, 'timestamp': '2025-09-10 02:26:52.426355', 'step': 5198, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 5695507448064}, 'timestamp': '2025-09-10 02:26:52.457633', 'step': 5198, 'epoch': 3} {'type': 'loss', 'content': 0.026408951729536057, 'timestamp': '2025-09-10 02:26:52.462089', 'step': 5199, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 400], 'flops': 11865355886272}, 'timestamp': '2025-09-10 02:26:52.500650', 'step': 5199, 'epoch': 3} {'type': 'loss', 'content': 0.00909572746604681, 'timestamp': '2025-09-10 02:26:52.537239', 'step': 5200, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 6170111174080}, 'timestamp': '2025-09-10 02:26:52.568974', 'step': 5200, 'epoch': 3} {'type': 'loss', 'content': 0.00032603865838609636, 'timestamp': '2025-09-10 02:26:52.573368', 'step': 5201, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 6170111174080}, 'timestamp': '2025-09-10 02:26:52.603891', 'step': 5201, 'epoch': 3} {'type': 'loss', 'content': 0.005753154866397381, 'timestamp': '2025-09-10 02:26:52.610681', 'step': 5202, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 7593922352128}, 'timestamp': '2025-09-10 02:26:52.641795', 'step': 5202, 'epoch': 3} {'type': 'loss', 'content': 0.0010426250519230962, 'timestamp': '2025-09-10 02:26:52.649517', 'step': 5203, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 432], 'flops': 12814563338304}, 'timestamp': '2025-09-10 02:26:52.689718', 'step': 5203, 'epoch': 3} {'type': 'loss', 'content': 0.0064912172965705395, 'timestamp': '2025-09-10 02:26:52.726772', 'step': 5204, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 5695507448064}, 'timestamp': '2025-09-10 02:26:52.759662', 'step': 5204, 'epoch': 3} {'type': 'loss', 'content': 0.00025196291971951723, 'timestamp': '2025-09-10 02:26:52.761919', 'step': 5205, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 352], 'flops': 10441544708224}, 'timestamp': '2025-09-10 02:26:52.796838', 'step': 5205, 'epoch': 3} {'type': 'loss', 'content': 0.0011967868776991963, 'timestamp': '2025-09-10 02:26:52.810487', 'step': 5206, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 7593922352128}, 'timestamp': '2025-09-10 02:26:52.842346', 'step': 5206, 'epoch': 3} {'type': 'loss', 'content': 0.0008071462507359684, 'timestamp': '2025-09-10 02:26:52.849531', 'step': 5207, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 176], 'flops': 5220903722048}, 'timestamp': '2025-09-10 02:26:52.881484', 'step': 5207, 'epoch': 3} {'type': 'loss', 'content': 0.00036838999949395657, 'timestamp': '2025-09-10 02:26:52.908146', 'step': 5208, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 6170111174080}, 'timestamp': '2025-09-10 02:26:52.947984', 'step': 5208, 'epoch': 3} {'type': 'loss', 'content': 0.0002956208190880716, 'timestamp': '2025-09-10 02:26:52.952213', 'step': 5209, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 4746299996032}, 'timestamp': '2025-09-10 02:26:52.984034', 'step': 5209, 'epoch': 3} {'type': 'loss', 'content': 0.003162443172186613, 'timestamp': '2025-09-10 02:26:52.986347', 'step': 5210, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-10 02:26:53.016915', 'step': 5210, 'epoch': 3} {'type': 'loss', 'content': 0.0004851007543038577, 'timestamp': '2025-09-10 02:26:53.019203', 'step': 5211, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 416], 'flops': 12339959612288}, 'timestamp': '2025-09-10 02:26:53.057262', 'step': 5211, 'epoch': 3} {'type': 'loss', 'content': 0.0001697681873338297, 'timestamp': '2025-09-10 02:26:53.094094', 'step': 5212, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:26:53.124931', 'step': 5212, 'epoch': 3} {'type': 'loss', 'content': 0.0003544171922840178, 'timestamp': '2025-09-10 02:26:53.126906', 'step': 5213, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 5695507448064}, 'timestamp': '2025-09-10 02:26:53.158968', 'step': 5213, 'epoch': 3} {'type': 'loss', 'content': 0.007984244264662266, 'timestamp': '2025-09-10 02:26:53.163284', 'step': 5214, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 7119318626112}, 'timestamp': '2025-09-10 02:26:53.194990', 'step': 5214, 'epoch': 3} {'type': 'loss', 'content': 0.0009763463167473674, 'timestamp': '2025-09-10 02:26:53.202091', 'step': 5215, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 8068526078144}, 'timestamp': '2025-09-10 02:26:53.233627', 'step': 5215, 'epoch': 3} {'type': 'loss', 'content': 8.645112393423915e-05, 'timestamp': '2025-09-10 02:26:53.264071', 'step': 5216, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 6644714900096}, 'timestamp': '2025-09-10 02:26:53.296248', 'step': 5216, 'epoch': 3} {'type': 'loss', 'content': 0.00014072064368519932, 'timestamp': '2025-09-10 02:26:53.300402', 'step': 5217, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 8068526078144}, 'timestamp': '2025-09-10 02:26:53.332383', 'step': 5217, 'epoch': 3} {'type': 'loss', 'content': 0.012747065164148808, 'timestamp': '2025-09-10 02:26:53.341941', 'step': 5218, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 368], 'flops': 10916148434240}, 'timestamp': '2025-09-10 02:26:53.375345', 'step': 5218, 'epoch': 3} {'type': 'loss', 'content': 0.00011503745190566406, 'timestamp': '2025-09-10 02:26:53.389105', 'step': 5219, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 7593922352128}, 'timestamp': '2025-09-10 02:26:53.422086', 'step': 5219, 'epoch': 3} {'type': 'loss', 'content': 0.00017217174172401428, 'timestamp': '2025-09-10 02:26:53.450612', 'step': 5220, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 7593922352128}, 'timestamp': '2025-09-10 02:26:53.482505', 'step': 5220, 'epoch': 3} {'type': 'loss', 'content': 0.0002020140818785876, 'timestamp': '2025-09-10 02:26:53.487460', 'step': 5221, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 7593922352128}, 'timestamp': '2025-09-10 02:26:53.520956', 'step': 5221, 'epoch': 3} {'type': 'loss', 'content': 0.000206151555175893, 'timestamp': '2025-09-10 02:26:53.528561', 'step': 5222, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 304], 'flops': 9017733530176}, 'timestamp': '2025-09-10 02:26:53.560359', 'step': 5222, 'epoch': 3} {'type': 'loss', 'content': 0.0019689316395670176, 'timestamp': '2025-09-10 02:26:53.571892', 'step': 5223, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 320], 'flops': 9492337256192}, 'timestamp': '2025-09-10 02:26:53.602541', 'step': 5223, 'epoch': 3} {'type': 'loss', 'content': 0.0025810713414102793, 'timestamp': '2025-09-10 02:26:53.635699', 'step': 5224, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 368], 'flops': 10916148434240}, 'timestamp': '2025-09-10 02:26:53.671170', 'step': 5224, 'epoch': 3} {'type': 'loss', 'content': 0.005631653126329184, 'timestamp': '2025-09-10 02:26:53.684263', 'step': 5225, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 8068526078144}, 'timestamp': '2025-09-10 02:26:53.716685', 'step': 5225, 'epoch': 3} {'type': 'loss', 'content': 0.0002063760912278667, 'timestamp': '2025-09-10 02:26:53.725994', 'step': 5226, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 7593922352128}, 'timestamp': '2025-09-10 02:26:53.757179', 'step': 5226, 'epoch': 3} {'type': 'loss', 'content': 0.010216274298727512, 'timestamp': '2025-09-10 02:26:53.764700', 'step': 5227, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 416], 'flops': 12339959612288}, 'timestamp': '2025-09-10 02:26:53.802766', 'step': 5227, 'epoch': 3} {'type': 'loss', 'content': 0.001996259670704603, 'timestamp': '2025-09-10 02:26:53.839561', 'step': 5228, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 6170111174080}, 'timestamp': '2025-09-10 02:26:53.869898', 'step': 5228, 'epoch': 3} {'type': 'loss', 'content': 0.0011374764144420624, 'timestamp': '2025-09-10 02:26:53.874083', 'step': 5229, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 352], 'flops': 10441544708224}, 'timestamp': '2025-09-10 02:26:53.908877', 'step': 5229, 'epoch': 3} {'type': 'loss', 'content': 0.0006331949844025075, 'timestamp': '2025-09-10 02:26:53.922647', 'step': 5230, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 336], 'flops': 9966940982208}, 'timestamp': '2025-09-10 02:26:53.955933', 'step': 5230, 'epoch': 3} {'type': 'loss', 'content': 0.0007114322506822646, 'timestamp': '2025-09-10 02:26:53.969325', 'step': 5231, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 5695507448064}, 'timestamp': '2025-09-10 02:26:54.001297', 'step': 5231, 'epoch': 3} {'type': 'loss', 'content': 0.004758648574352264, 'timestamp': '2025-09-10 02:26:54.026232', 'step': 5232, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 8543129804160}, 'timestamp': '2025-09-10 02:26:54.058661', 'step': 5232, 'epoch': 3} {'type': 'loss', 'content': 0.00021887525508645922, 'timestamp': '2025-09-10 02:26:54.066298', 'step': 5233, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 176], 'flops': 5220903722048}, 'timestamp': '2025-09-10 02:26:54.097606', 'step': 5233, 'epoch': 3} {'type': 'loss', 'content': 0.0062928879633545876, 'timestamp': '2025-09-10 02:26:54.101720', 'step': 5234, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 7119318626112}, 'timestamp': '2025-09-10 02:26:54.132929', 'step': 5234, 'epoch': 3} {'type': 'loss', 'content': 0.0007375451386906207, 'timestamp': '2025-09-10 02:26:54.140376', 'step': 5235, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 176], 'flops': 5220903722048}, 'timestamp': '2025-09-10 02:26:54.173713', 'step': 5235, 'epoch': 3} {'type': 'loss', 'content': 0.00021762121468782425, 'timestamp': '2025-09-10 02:26:54.198807', 'step': 5236, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 304], 'flops': 9017733530176}, 'timestamp': '2025-09-10 02:26:54.231943', 'step': 5236, 'epoch': 3} {'type': 'loss', 'content': 0.0011317295720800757, 'timestamp': '2025-09-10 02:26:54.241667', 'step': 5237, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 7593922352128}, 'timestamp': '2025-09-10 02:26:54.275801', 'step': 5237, 'epoch': 3} {'type': 'loss', 'content': 0.00016049954865593463, 'timestamp': '2025-09-10 02:26:54.283440', 'step': 5238, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 4746299996032}, 'timestamp': '2025-09-10 02:26:54.313598', 'step': 5238, 'epoch': 3} {'type': 'loss', 'content': 0.01704251952469349, 'timestamp': '2025-09-10 02:26:54.316005', 'step': 5239, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 7593922352128}, 'timestamp': '2025-09-10 02:26:54.348099', 'step': 5239, 'epoch': 3} {'type': 'loss', 'content': 0.001351992366835475, 'timestamp': '2025-09-10 02:26:54.376305', 'step': 5240, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 6644714900096}, 'timestamp': '2025-09-10 02:26:54.407460', 'step': 5240, 'epoch': 3} {'type': 'loss', 'content': 0.0004043028748128563, 'timestamp': '2025-09-10 02:26:54.411662', 'step': 5241, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 8068526078144}, 'timestamp': '2025-09-10 02:26:54.442179', 'step': 5241, 'epoch': 3} {'type': 'loss', 'content': 0.005190826021134853, 'timestamp': '2025-09-10 02:26:54.451994', 'step': 5242, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 7119318626112}, 'timestamp': '2025-09-10 02:26:54.483998', 'step': 5242, 'epoch': 3} {'type': 'loss', 'content': 0.0003823291917797178, 'timestamp': '2025-09-10 02:26:54.491018', 'step': 5243, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 7593922352128}, 'timestamp': '2025-09-10 02:26:54.521728', 'step': 5243, 'epoch': 3} {'type': 'loss', 'content': 0.0018407927127555013, 'timestamp': '2025-09-10 02:26:54.550143', 'step': 5244, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 304], 'flops': 9017733530176}, 'timestamp': '2025-09-10 02:26:54.583304', 'step': 5244, 'epoch': 3} {'type': 'loss', 'content': 5.925807636231184e-05, 'timestamp': '2025-09-10 02:26:54.592363', 'step': 5245, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 7593922352128}, 'timestamp': '2025-09-10 02:26:54.623476', 'step': 5245, 'epoch': 3} {'type': 'loss', 'content': 0.002199393231421709, 'timestamp': '2025-09-10 02:26:54.630980', 'step': 5246, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 480], 'flops': 14238374516352}, 'timestamp': '2025-09-10 02:26:54.672924', 'step': 5246, 'epoch': 3} {'type': 'loss', 'content': 0.00037568985135294497, 'timestamp': '2025-09-10 02:26:54.690237', 'step': 5247, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 6644714900096}, 'timestamp': '2025-09-10 02:26:54.721466', 'step': 5247, 'epoch': 3} {'type': 'loss', 'content': 0.0037835666444152594, 'timestamp': '2025-09-10 02:26:54.749018', 'step': 5248, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 5695507448064}, 'timestamp': '2025-09-10 02:26:54.782708', 'step': 5248, 'epoch': 3} {'type': 'loss', 'content': 0.00022347843332681805, 'timestamp': '2025-09-10 02:26:54.785142', 'step': 5249, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 688], 'flops': 20408222954560}, 'timestamp': '2025-09-10 02:26:54.841596', 'step': 5249, 'epoch': 3} {'type': 'loss', 'content': 7.557055505458266e-05, 'timestamp': '2025-09-10 02:26:54.865716', 'step': 5250, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 4746299996032}, 'timestamp': '2025-09-10 02:26:54.898591', 'step': 5250, 'epoch': 3} {'type': 'loss', 'content': 0.002831138903275132, 'timestamp': '2025-09-10 02:26:54.900995', 'step': 5251, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 7593922352128}, 'timestamp': '2025-09-10 02:26:54.932463', 'step': 5251, 'epoch': 3} {'type': 'loss', 'content': 0.0007521641673520207, 'timestamp': '2025-09-10 02:26:54.960783', 'step': 5252, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 7119318626112}, 'timestamp': '2025-09-10 02:26:54.991912', 'step': 5252, 'epoch': 3} {'type': 'loss', 'content': 9.602372301742435e-05, 'timestamp': '2025-09-10 02:26:54.996710', 'step': 5253, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 4746299996032}, 'timestamp': '2025-09-10 02:26:55.027852', 'step': 5253, 'epoch': 3} {'type': 'loss', 'content': 0.014525480568408966, 'timestamp': '2025-09-10 02:26:55.030309', 'step': 5254, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 304], 'flops': 9017733530176}, 'timestamp': '2025-09-10 02:26:55.061017', 'step': 5254, 'epoch': 3} {'type': 'loss', 'content': 8.630308730062097e-05, 'timestamp': '2025-09-10 02:26:55.072516', 'step': 5255, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 7593922352128}, 'timestamp': '2025-09-10 02:26:55.104329', 'step': 5255, 'epoch': 3} {'type': 'loss', 'content': 0.00013203138951212168, 'timestamp': '2025-09-10 02:26:55.132507', 'step': 5256, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 6644714900096}, 'timestamp': '2025-09-10 02:26:55.164602', 'step': 5256, 'epoch': 3} {'type': 'loss', 'content': 0.0021953012328594923, 'timestamp': '2025-09-10 02:26:55.169052', 'step': 5257, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 400], 'flops': 11865355886272}, 'timestamp': '2025-09-10 02:26:55.206701', 'step': 5257, 'epoch': 3} {'type': 'loss', 'content': 0.017087912186980247, 'timestamp': '2025-09-10 02:26:55.222355', 'step': 5258, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 7119318626112}, 'timestamp': '2025-09-10 02:26:55.253490', 'step': 5258, 'epoch': 3} {'type': 'loss', 'content': 0.00040845529292710125, 'timestamp': '2025-09-10 02:26:55.260749', 'step': 5259, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 560], 'flops': 16611393146432}, 'timestamp': '2025-09-10 02:26:55.308329', 'step': 5259, 'epoch': 3} {'type': 'loss', 'content': 9.842081635724753e-05, 'timestamp': '2025-09-10 02:26:55.348588', 'step': 5260, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 6170111174080}, 'timestamp': '2025-09-10 02:26:55.379379', 'step': 5260, 'epoch': 3} {'type': 'loss', 'content': 0.0008445015409961343, 'timestamp': '2025-09-10 02:26:55.383680', 'step': 5261, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 8068526078144}, 'timestamp': '2025-09-10 02:26:55.415441', 'step': 5261, 'epoch': 3} {'type': 'loss', 'content': 0.00015750537568237633, 'timestamp': '2025-09-10 02:26:55.424919', 'step': 5262, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 6170111174080}, 'timestamp': '2025-09-10 02:26:55.456799', 'step': 5262, 'epoch': 3} {'type': 'loss', 'content': 0.0009556380682624876, 'timestamp': '2025-09-10 02:26:55.463257', 'step': 5263, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 5695507448064}, 'timestamp': '2025-09-10 02:26:55.500776', 'step': 5263, 'epoch': 3} {'type': 'loss', 'content': 0.00016424224304500967, 'timestamp': '2025-09-10 02:26:55.525896', 'step': 5264, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 320], 'flops': 9492337256192}, 'timestamp': '2025-09-10 02:26:55.557462', 'step': 5264, 'epoch': 3} {'type': 'loss', 'content': 0.00011993583757430315, 'timestamp': '2025-09-10 02:26:55.567142', 'step': 5265, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 304], 'flops': 9017733530176}, 'timestamp': '2025-09-10 02:26:55.598056', 'step': 5265, 'epoch': 3} {'type': 'loss', 'content': 0.00017937307711690664, 'timestamp': '2025-09-10 02:26:55.609593', 'step': 5266, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 6644714900096}, 'timestamp': '2025-09-10 02:26:55.641151', 'step': 5266, 'epoch': 3} {'type': 'loss', 'content': 0.0018583576893433928, 'timestamp': '2025-09-10 02:26:55.647912', 'step': 5267, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 304], 'flops': 9017733530176}, 'timestamp': '2025-09-10 02:26:55.680586', 'step': 5267, 'epoch': 3} {'type': 'loss', 'content': 0.002548788907006383, 'timestamp': '2025-09-10 02:26:55.713197', 'step': 5268, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 5695507448064}, 'timestamp': '2025-09-10 02:26:55.744559', 'step': 5268, 'epoch': 3} {'type': 'loss', 'content': 0.00015272719610948116, 'timestamp': '2025-09-10 02:26:55.748061', 'step': 5269, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 5695507448064}, 'timestamp': '2025-09-10 02:26:55.780263', 'step': 5269, 'epoch': 3} {'type': 'loss', 'content': 0.00022310127678792924, 'timestamp': '2025-09-10 02:26:55.784449', 'step': 5270, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 5695507448064}, 'timestamp': '2025-09-10 02:26:55.814738', 'step': 5270, 'epoch': 3} {'type': 'loss', 'content': 0.000734326837118715, 'timestamp': '2025-09-10 02:26:55.818917', 'step': 5271, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 384], 'flops': 11390752160256}, 'timestamp': '2025-09-10 02:26:55.854007', 'step': 5271, 'epoch': 3} {'type': 'loss', 'content': 0.0004884271766059101, 'timestamp': '2025-09-10 02:26:55.888955', 'step': 5272, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 5695507448064}, 'timestamp': '2025-09-10 02:26:55.920093', 'step': 5272, 'epoch': 3} {'type': 'loss', 'content': 0.0021002234425395727, 'timestamp': '2025-09-10 02:26:55.922221', 'step': 5273, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 5695507448064}, 'timestamp': '2025-09-10 02:26:55.952507', 'step': 5273, 'epoch': 3} {'type': 'loss', 'content': 4.44793731730897e-05, 'timestamp': '2025-09-10 02:26:55.956751', 'step': 5274, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 6644714900096}, 'timestamp': '2025-09-10 02:26:55.987419', 'step': 5274, 'epoch': 3} {'type': 'loss', 'content': 9.469302312936634e-05, 'timestamp': '2025-09-10 02:26:55.994191', 'step': 5275, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 304], 'flops': 9017733530176}, 'timestamp': '2025-09-10 02:26:56.026153', 'step': 5275, 'epoch': 3} {'type': 'loss', 'content': 0.0009514411212876439, 'timestamp': '2025-09-10 02:26:56.058722', 'step': 5276, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 368], 'flops': 10916148434240}, 'timestamp': '2025-09-10 02:26:56.090585', 'step': 5276, 'epoch': 3} {'type': 'loss', 'content': 6.029165888321586e-05, 'timestamp': '2025-09-10 02:26:56.103707', 'step': 5277, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 176], 'flops': 5220903722048}, 'timestamp': '2025-09-10 02:26:56.134258', 'step': 5277, 'epoch': 3} {'type': 'loss', 'content': 0.03702101483941078, 'timestamp': '2025-09-10 02:26:56.138049', 'step': 5278, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 4746299996032}, 'timestamp': '2025-09-10 02:26:56.169637', 'step': 5278, 'epoch': 3} {'type': 'loss', 'content': 7.557779463240877e-05, 'timestamp': '2025-09-10 02:26:56.172049', 'step': 5279, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 8543129804160}, 'timestamp': '2025-09-10 02:26:56.204160', 'step': 5279, 'epoch': 3} {'type': 'loss', 'content': 0.0008645829511806369, 'timestamp': '2025-09-10 02:26:56.235523', 'step': 5280, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 6170111174080}, 'timestamp': '2025-09-10 02:26:56.267832', 'step': 5280, 'epoch': 3} {'type': 'loss', 'content': 0.00013446787488646805, 'timestamp': '2025-09-10 02:26:56.271949', 'step': 5281, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 4746299996032}, 'timestamp': '2025-09-10 02:26:56.304519', 'step': 5281, 'epoch': 3} {'type': 'loss', 'content': 0.003033523913472891, 'timestamp': '2025-09-10 02:26:56.306908', 'step': 5282, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 176], 'flops': 5220903722048}, 'timestamp': '2025-09-10 02:26:56.337846', 'step': 5282, 'epoch': 3} {'type': 'loss', 'content': 0.0018720559310168028, 'timestamp': '2025-09-10 02:26:56.341602', 'step': 5283, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 8068526078144}, 'timestamp': '2025-09-10 02:26:56.373785', 'step': 5283, 'epoch': 3} {'type': 'loss', 'content': 0.0002793918247334659, 'timestamp': '2025-09-10 02:26:56.404155', 'step': 5284, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 176], 'flops': 5220903722048}, 'timestamp': '2025-09-10 02:26:56.438641', 'step': 5284, 'epoch': 3} {'type': 'loss', 'content': 0.05163096636533737, 'timestamp': '2025-09-10 02:26:56.441011', 'step': 5285, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 7119318626112}, 'timestamp': '2025-09-10 02:26:56.472033', 'step': 5285, 'epoch': 3} {'type': 'loss', 'content': 0.009458529762923717, 'timestamp': '2025-09-10 02:26:56.479305', 'step': 5286, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 464], 'flops': 13763770790336}, 'timestamp': '2025-09-10 02:26:56.520156', 'step': 5286, 'epoch': 3} {'type': 'loss', 'content': 0.0008849184960126877, 'timestamp': '2025-09-10 02:26:56.537189', 'step': 5287, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 320], 'flops': 9492337256192}, 'timestamp': '2025-09-10 02:26:56.569025', 'step': 5287, 'epoch': 3} {'type': 'loss', 'content': 0.0013038819888606668, 'timestamp': '2025-09-10 02:26:56.602016', 'step': 5288, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 6170111174080}, 'timestamp': '2025-09-10 02:26:56.633736', 'step': 5288, 'epoch': 3} {'type': 'loss', 'content': 0.04861394315958023, 'timestamp': '2025-09-10 02:26:56.638075', 'step': 5289, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 6170111174080}, 'timestamp': '2025-09-10 02:26:56.668284', 'step': 5289, 'epoch': 3} {'type': 'loss', 'content': 0.004373115487396717, 'timestamp': '2025-09-10 02:26:56.674804', 'step': 5290, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 8543129804160}, 'timestamp': '2025-09-10 02:26:56.707345', 'step': 5290, 'epoch': 3} {'type': 'loss', 'content': 0.00035963160917162895, 'timestamp': '2025-09-10 02:26:56.717659', 'step': 5291, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 336], 'flops': 9966940982208}, 'timestamp': '2025-09-10 02:26:56.750783', 'step': 5291, 'epoch': 3} {'type': 'loss', 'content': 0.00020156674145255238, 'timestamp': '2025-09-10 02:26:56.785068', 'step': 5292, 'epoch': 3} {'type': 'flops', 'content': [{'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5695213340928}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3796808916480}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4429610391296}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4746011128704}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4746011128704}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6011614078336}, {'type': 'perplexity', 'in_batch_dim': [8, 736], 'batch_size': 8, 'flops': 14554433988352}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3796808916480}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4113209653888}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3796808916480}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4746011128704}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2847606704256}, {'type': 'perplexity', 'in_batch_dim': [8, 320], 'batch_size': 8, 'flops': 6328014815744}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5378812603520}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3796808916480}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4746011128704}, {'type': 'perplexity', 'in_batch_dim': [8, 352], 'batch_size': 8, 'flops': 6960816290560}, {'type': 'perplexity', 'in_batch_dim': [8, 384], 'batch_size': 8, 'flops': 7593617765376}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5378812603520}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4429610391296}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4113209653888}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5062411866112}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5062411866112}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4429610391296}, {'type': 'perplexity', 'in_batch_dim': [8, 480], 'batch_size': 8, 'flops': 9492022189824}, {'type': 'perplexity', 'in_batch_dim': [8, 448], 'batch_size': 8, 'flops': 8859220715008}, {'type': 'perplexity', 'in_batch_dim': [8, 320], 'batch_size': 8, 'flops': 6328014815744}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3796808916480}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6011614078336}, {'type': 'perplexity', 'in_batch_dim': [8, 320], 'batch_size': 8, 'flops': 6328014815744}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5695213340928}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4746011128704}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4429610391296}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5378812603520}, {'type': 'perplexity', 'in_batch_dim': [8, 544], 'batch_size': 8, 'flops': 10757625139456}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4429610391296}, {'type': 'perplexity', 'in_batch_dim': [8, 320], 'batch_size': 8, 'flops': 6328014815744}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4113209653888}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4113209653888}, {'type': 'perplexity', 'in_batch_dim': [8, 320], 'batch_size': 8, 'flops': 6328014815744}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3480408179072}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5062411866112}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6011614078336}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4429610391296}, {'type': 'perplexity', 'in_batch_dim': [8, 384], 'batch_size': 8, 'flops': 7593617765376}, {'type': 'perplexity', 'in_batch_dim': [8, 416], 'batch_size': 8, 'flops': 8226419240192}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4746011128704}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3480408179072}, {'type': 'perplexity', 'in_batch_dim': [8, 384], 'batch_size': 8, 'flops': 7593617765376}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3480408179072}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3480408179072}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5378812603520}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3796808916480}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4113209653888}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5062411866112}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5378812603520}, {'type': 'perplexity', 'in_batch_dim': [8, 320], 'batch_size': 8, 'flops': 6328014815744}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4429610391296}, {'type': 'perplexity', 'in_batch_dim': [8, 160], 'batch_size': 8, 'flops': 3164007441664}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4113209653888}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5695213340928}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3796808916480}, {'type': 'perplexity', 'in_batch_dim': [8, 160], 'batch_size': 8, 'flops': 3164007441664}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6011614078336}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5062411866112}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4429610391296}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3480408179072}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4746011128704}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3480408179072}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4113209653888}, {'type': 'perplexity', 'in_batch_dim': [8, 624], 'batch_size': 8, 'flops': 12339628826496}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4113209653888}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5695213340928}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5062411866112}, {'type': 'perplexity', 'in_batch_dim': [8, 384], 'batch_size': 8, 'flops': 7593617765376}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4429610391296}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4113209653888}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5062411866112}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4429610391296}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5695213340928}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3796808916480}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5378812603520}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4429610391296}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5378812603520}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3796808916480}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5695213340928}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4429610391296}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5062411866112}, {'type': 'perplexity', 'in_batch_dim': [8, 160], 'batch_size': 8, 'flops': 3164007441664}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4746011128704}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5695213340928}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3796808916480}, {'type': 'perplexity', 'in_batch_dim': [8, 416], 'batch_size': 8, 'flops': 8226419240192}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3796808916480}, {'type': 'perplexity', 'in_batch_dim': [8, 160], 'batch_size': 8, 'flops': 3164007441664}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4746011128704}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3796808916480}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2847606704256}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5378812603520}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4429610391296}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4113209653888}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4429610391296}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6011614078336}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4113209653888}, {'type': 'perplexity', 'in_batch_dim': [8, 320], 'batch_size': 8, 'flops': 6328014815744}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4746011128704}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4746011128704}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3796808916480}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5062411866112}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3480408179072}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3796808916480}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3480408179072}, {'type': 'perplexity', 'in_batch_dim': [8, 352], 'batch_size': 8, 'flops': 6960816290560}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5062411866112}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4429610391296}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6011614078336}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5062411866112}, {'type': 'perplexity', 'in_batch_dim': [8, 576], 'batch_size': 8, 'flops': 11390426614272}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4429610391296}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3796808916480}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6011614078336}, {'type': 'perplexity', 'in_batch_dim': [8, 432], 'batch_size': 8, 'flops': 8542819977600}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5378812603520}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5378812603520}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5062411866112}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4113209653888}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4113209653888}, {'type': 'perplexity', 'in_batch_dim': [8, 368], 'batch_size': 8, 'flops': 7277217027968}, {'type': 'perplexity', 'in_batch_dim': [8, 160], 'batch_size': 8, 'flops': 3164007441664}, {'type': 'perplexity', 'in_batch_dim': [8, 368], 'batch_size': 8, 'flops': 7277217027968}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3796808916480}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4746011128704}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4113209653888}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4429610391296}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5695213340928}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5695213340928}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5062411866112}, {'type': 'perplexity', 'in_batch_dim': [8, 432], 'batch_size': 8, 'flops': 8542819977600}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5062411866112}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4113209653888}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3480408179072}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3480408179072}, {'type': 'perplexity', 'in_batch_dim': [8, 160], 'batch_size': 8, 'flops': 3164007441664}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4113209653888}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3796808916480}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5062411866112}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 368], 'batch_size': 8, 'flops': 7277217027968}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5378812603520}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5378812603520}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5062411866112}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2847606704256}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4429610391296}, {'type': 'perplexity', 'in_batch_dim': [8, 320], 'batch_size': 8, 'flops': 6328014815744}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3796808916480}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4429610391296}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4113209653888}, {'type': 'perplexity', 'in_batch_dim': [8, 384], 'batch_size': 8, 'flops': 7593617765376}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5378812603520}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4746011128704}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3480408179072}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5062411866112}, {'type': 'perplexity', 'in_batch_dim': [8, 320], 'batch_size': 8, 'flops': 6328014815744}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5378812603520}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4113209653888}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4113209653888}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5695213340928}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3480408179072}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6011614078336}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4746011128704}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5378812603520}, {'type': 'perplexity', 'in_batch_dim': [8, 320], 'batch_size': 8, 'flops': 6328014815744}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4429610391296}, {'type': 'perplexity', 'in_batch_dim': [8, 320], 'batch_size': 8, 'flops': 6328014815744}, {'type': 'perplexity', 'in_batch_dim': [8, 416], 'batch_size': 8, 'flops': 8226419240192}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5062411866112}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5062411866112}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2847606704256}, {'type': 'perplexity', 'in_batch_dim': [8, 336], 'batch_size': 8, 'flops': 6644415553152}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6011614078336}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5062411866112}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4113209653888}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5695213340928}, {'type': 'perplexity', 'in_batch_dim': [8, 416], 'batch_size': 8, 'flops': 8226419240192}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3796808916480}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5695213340928}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4746011128704}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4429610391296}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4113209653888}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3480408179072}, {'type': 'perplexity', 'in_batch_dim': [8, 320], 'batch_size': 8, 'flops': 6328014815744}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3480408179072}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5378812603520}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6011614078336}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4429610391296}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5695213340928}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5695213340928}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3480408179072}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4429610391296}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5062411866112}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3480408179072}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4113209653888}, {'type': 'perplexity', 'in_batch_dim': [8, 352], 'batch_size': 8, 'flops': 6960816290560}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4113209653888}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2847606704256}, {'type': 'perplexity', 'in_batch_dim': [8, 160], 'batch_size': 8, 'flops': 3164007441664}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3796808916480}, {'type': 'perplexity', 'in_batch_dim': [8, 496], 'batch_size': 8, 'flops': 9808422927232}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3480408179072}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4113209653888}, {'type': 'perplexity', 'in_batch_dim': [8, 544], 'batch_size': 8, 'flops': 10757625139456}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6011614078336}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5378812603520}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5695213340928}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4746011128704}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5695213340928}, {'type': 'perplexity', 'in_batch_dim': [8, 384], 'batch_size': 8, 'flops': 7593617765376}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3480408179072}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5378812603520}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3480408179072}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5378812603520}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5062411866112}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3796808916480}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5062411866112}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5695213340928}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5378812603520}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4429610391296}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5062411866112}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3480408179072}, {'type': 'perplexity', 'in_batch_dim': [8, 656], 'batch_size': 8, 'flops': 12972430301312}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5062411866112}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3796808916480}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3480408179072}, {'type': 'perplexity', 'in_batch_dim': [8, 160], 'batch_size': 8, 'flops': 3164007441664}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5695213340928}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6011614078336}, {'type': 'perplexity', 'in_batch_dim': [8, 320], 'batch_size': 8, 'flops': 6328014815744}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5062411866112}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5062411866112}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5695213340928}, {'type': 'perplexity', 'in_batch_dim': [8, 320], 'batch_size': 8, 'flops': 6328014815744}, {'type': 'perplexity', 'in_batch_dim': [8, 384], 'batch_size': 8, 'flops': 7593617765376}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5062411866112}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4746011128704}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4113209653888}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4746011128704}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4746011128704}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3480408179072}, {'type': 'perplexity', 'in_batch_dim': [8, 320], 'batch_size': 8, 'flops': 6328014815744}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5695213340928}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5062411866112}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4113209653888}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4746011128704}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2847606704256}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3796808916480}, {'type': 'perplexity', 'in_batch_dim': [8, 336], 'batch_size': 8, 'flops': 6644415553152}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4113209653888}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4746011128704}, {'type': 'perplexity', 'in_batch_dim': [8, 496], 'batch_size': 8, 'flops': 9808422927232}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5378812603520}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5378812603520}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4429610391296}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5062411866112}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6011614078336}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4746011128704}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3796808916480}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4113209653888}, {'type': 'perplexity', 'in_batch_dim': [8, 496], 'batch_size': 8, 'flops': 9808422927232}, {'type': 'perplexity', 'in_batch_dim': [8, 448], 'batch_size': 8, 'flops': 8859220715008}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6011614078336}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3796808916480}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3480408179072}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5062411866112}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4429610391296}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4113209653888}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4746011128704}, {'type': 'perplexity', 'in_batch_dim': [8, 320], 'batch_size': 8, 'flops': 6328014815744}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6011614078336}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3480408179072}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3796808916480}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3796808916480}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4746011128704}, {'type': 'perplexity', 'in_batch_dim': [8, 416], 'batch_size': 8, 'flops': 8226419240192}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5695213340928}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6011614078336}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6011614078336}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4746011128704}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3480408179072}, {'type': 'perplexity', 'in_batch_dim': [8, 160], 'batch_size': 8, 'flops': 3164007441664}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5695213340928}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3480408179072}, {'type': 'perplexity', 'in_batch_dim': [8, 400], 'batch_size': 8, 'flops': 7910018502784}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4429610391296}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5378812603520}, {'type': 'perplexity', 'in_batch_dim': [8, 400], 'batch_size': 8, 'flops': 7910018502784}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5378812603520}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3480408179072}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6011614078336}, {'type': 'perplexity', 'in_batch_dim': [8, 160], 'batch_size': 8, 'flops': 3164007441664}, {'type': 'perplexity', 'in_batch_dim': [8, 352], 'batch_size': 8, 'flops': 6960816290560}, {'type': 'perplexity', 'in_batch_dim': [8, 320], 'batch_size': 8, 'flops': 6328014815744}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5062411866112}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5062411866112}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4113209653888}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4429610391296}, {'type': 'perplexity', 'in_batch_dim': [8, 352], 'batch_size': 8, 'flops': 6960816290560}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4113209653888}, {'type': 'perplexity', 'in_batch_dim': [8, 160], 'batch_size': 8, 'flops': 3164007441664}, {'type': 'perplexity', 'in_batch_dim': [8, 336], 'batch_size': 8, 'flops': 6644415553152}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4429610391296}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3796808916480}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5378812603520}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5378812603520}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4113209653888}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5378812603520}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3796808916480}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5378812603520}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5378812603520}, {'type': 'perplexity', 'in_batch_dim': [8, 160], 'batch_size': 8, 'flops': 3164007441664}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3480408179072}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6011614078336}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5695213340928}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5378812603520}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5378812603520}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4429610391296}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4746011128704}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6011614078336}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5062411866112}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6011614078336}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5378812603520}, {'type': 'perplexity', 'in_batch_dim': [8, 160], 'batch_size': 8, 'flops': 3164007441664}, {'type': 'perplexity', 'in_batch_dim': [8, 160], 'batch_size': 8, 'flops': 3164007441664}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5062411866112}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5062411866112}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3796808916480}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6011614078336}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4113209653888}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4746011128704}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5378812603520}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3480408179072}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3796808916480}, {'type': 'perplexity', 'in_batch_dim': [8, 320], 'batch_size': 8, 'flops': 6328014815744}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5062411866112}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4746011128704}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5062411866112}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4746011128704}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6011614078336}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3796808916480}, {'type': 'perplexity', 'in_batch_dim': [8, 160], 'batch_size': 8, 'flops': 3164007441664}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2847606704256}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4429610391296}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4746011128704}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5062411866112}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5378812603520}, {'type': 'perplexity', 'in_batch_dim': [8, 464], 'batch_size': 8, 'flops': 9175621452416}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4113209653888}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3480408179072}, {'type': 'perplexity', 'in_batch_dim': [8, 384], 'batch_size': 8, 'flops': 7593617765376}, {'type': 'perplexity', 'in_batch_dim': [8, 448], 'batch_size': 8, 'flops': 8859220715008}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5378812603520}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4113209653888}, {'type': 'perplexity', 'in_batch_dim': [8, 320], 'batch_size': 8, 'flops': 6328014815744}, {'type': 'perplexity', 'in_batch_dim': [8, 560], 'batch_size': 8, 'flops': 11074025876864}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5378812603520}, {'type': 'perplexity', 'in_batch_dim': [8, 160], 'batch_size': 8, 'flops': 3164007441664}, {'type': 'perplexity', 'in_batch_dim': [8, 320], 'batch_size': 8, 'flops': 6328014815744}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5062411866112}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6011614078336}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4746011128704}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6011614078336}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3796808916480}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4429610391296}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4113209653888}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4746011128704}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3480408179072}, {'type': 'perplexity', 'in_batch_dim': [8, 384], 'batch_size': 8, 'flops': 7593617765376}, {'type': 'perplexity', 'in_batch_dim': [8, 576], 'batch_size': 8, 'flops': 11390426614272}, {'type': 'perplexity', 'in_batch_dim': [8, 160], 'batch_size': 8, 'flops': 3164007441664}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4746011128704}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4746011128704}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4429610391296}, {'type': 'perplexity', 'in_batch_dim': [8, 336], 'batch_size': 8, 'flops': 6644415553152}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4746011128704}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3796808916480}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3796808916480}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5062411866112}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5378812603520}, {'type': 'perplexity', 'in_batch_dim': [8, 368], 'batch_size': 8, 'flops': 7277217027968}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3480408179072}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4113209653888}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4746011128704}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3480408179072}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4746011128704}, {'type': 'perplexity', 'in_batch_dim': [8, 1168], 'batch_size': 8, 'flops': 23097253898368}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5062411866112}, {'type': 'perplexity', 'in_batch_dim': [8, 160], 'batch_size': 8, 'flops': 3164007441664}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4746011128704}, {'type': 'perplexity', 'in_batch_dim': [8, 640], 'batch_size': 8, 'flops': 12656029563904}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5378812603520}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4429610391296}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4429610391296}, {'type': 'perplexity', 'in_batch_dim': [8, 384], 'batch_size': 8, 'flops': 7593617765376}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3480408179072}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4113209653888}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3796808916480}, {'type': 'perplexity', 'in_batch_dim': [8, 336], 'batch_size': 8, 'flops': 6644415553152}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3480408179072}, {'type': 'perplexity', 'in_batch_dim': [6, 208], 'batch_size': 8, 'flops': 4113209653888}], 'timestamp': '2025-09-10 02:27:06.874113', 'step': 5292, 'epoch': 3} {'type': 'pplx', 'content': 19403711.464340024, 'timestamp': '2025-09-10 02:27:06.877312', 'step': 5292, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 5695507448064}, 'timestamp': '2025-09-10 02:27:06.907901', 'step': 5292, 'epoch': 3} {'type': 'loss', 'content': 0.00023475231137126684, 'timestamp': '2025-09-10 02:27:06.909938', 'step': 5293, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 8068526078144}, 'timestamp': '2025-09-10 02:27:06.942794', 'step': 5293, 'epoch': 3} {'type': 'loss', 'content': 0.0001284535537706688, 'timestamp': '2025-09-10 02:27:06.952035', 'step': 5294, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 7593922352128}, 'timestamp': '2025-09-10 02:27:06.983428', 'step': 5294, 'epoch': 3} {'type': 'loss', 'content': 0.003995387349277735, 'timestamp': '2025-09-10 02:27:06.991032', 'step': 5295, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 5695507448064}, 'timestamp': '2025-09-10 02:27:07.022414', 'step': 5295, 'epoch': 3} {'type': 'loss', 'content': 0.00036020742845721543, 'timestamp': '2025-09-10 02:27:07.047476', 'step': 5296, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 176], 'flops': 5220903722048}, 'timestamp': '2025-09-10 02:27:07.079413', 'step': 5296, 'epoch': 3} {'type': 'loss', 'content': 0.0002324790257262066, 'timestamp': '2025-09-10 02:27:07.081576', 'step': 5297, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 320], 'flops': 9492337256192}, 'timestamp': '2025-09-10 02:27:07.113372', 'step': 5297, 'epoch': 3} {'type': 'loss', 'content': 0.0002664544736035168, 'timestamp': '2025-09-10 02:27:07.125753', 'step': 5298, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 6644714900096}, 'timestamp': '2025-09-10 02:27:07.158414', 'step': 5298, 'epoch': 3} {'type': 'loss', 'content': 0.004768196027725935, 'timestamp': '2025-09-10 02:27:07.165119', 'step': 5299, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 8543129804160}, 'timestamp': '2025-09-10 02:27:07.197411', 'step': 5299, 'epoch': 3} {'type': 'loss', 'content': 0.0005364773096516728, 'timestamp': '2025-09-10 02:27:07.228444', 'step': 5300, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 6170111174080}, 'timestamp': '2025-09-10 02:27:07.260805', 'step': 5300, 'epoch': 3} {'type': 'loss', 'content': 0.005759544670581818, 'timestamp': '2025-09-10 02:27:07.264918', 'step': 5301, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 7593922352128}, 'timestamp': '2025-09-10 02:27:07.296947', 'step': 5301, 'epoch': 3} {'type': 'loss', 'content': 0.0015448706690222025, 'timestamp': '2025-09-10 02:27:07.304275', 'step': 5302, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 816], 'flops': 24205052762688}, 'timestamp': '2025-09-10 02:27:07.372258', 'step': 5302, 'epoch': 3} {'type': 'loss', 'content': 9.88330357358791e-05, 'timestamp': '2025-09-10 02:27:07.400778', 'step': 5303, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 7593922352128}, 'timestamp': '2025-09-10 02:27:07.433304', 'step': 5303, 'epoch': 3} {'type': 'loss', 'content': 0.0007378348964266479, 'timestamp': '2025-09-10 02:27:07.461302', 'step': 5304, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 8068526078144}, 'timestamp': '2025-09-10 02:27:07.494345', 'step': 5304, 'epoch': 3} {'type': 'loss', 'content': 0.00042900207336060703, 'timestamp': '2025-09-10 02:27:07.500713', 'step': 5305, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 304], 'flops': 9017733530176}, 'timestamp': '2025-09-10 02:27:07.533228', 'step': 5305, 'epoch': 3} {'type': 'loss', 'content': 0.0002336119068786502, 'timestamp': '2025-09-10 02:27:07.544426', 'step': 5306, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 5695507448064}, 'timestamp': '2025-09-10 02:27:07.576992', 'step': 5306, 'epoch': 3} {'type': 'loss', 'content': 7.151837053243071e-05, 'timestamp': '2025-09-10 02:27:07.580801', 'step': 5307, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 7593922352128}, 'timestamp': '2025-09-10 02:27:07.612631', 'step': 5307, 'epoch': 3} {'type': 'loss', 'content': 0.0003323144337628037, 'timestamp': '2025-09-10 02:27:07.640830', 'step': 5308, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 5695507448064}, 'timestamp': '2025-09-10 02:27:07.671961', 'step': 5308, 'epoch': 3} {'type': 'loss', 'content': 0.0012035273248329759, 'timestamp': '2025-09-10 02:27:07.674138', 'step': 5309, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 384], 'flops': 11390752160256}, 'timestamp': '2025-09-10 02:27:07.709551', 'step': 5309, 'epoch': 3} {'type': 'loss', 'content': 0.0002544188464526087, 'timestamp': '2025-09-10 02:27:07.723594', 'step': 5310, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 6644714900096}, 'timestamp': '2025-09-10 02:27:07.755728', 'step': 5310, 'epoch': 3} {'type': 'loss', 'content': 0.0003566377272363752, 'timestamp': '2025-09-10 02:27:07.762625', 'step': 5311, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 7119318626112}, 'timestamp': '2025-09-10 02:27:07.793161', 'step': 5311, 'epoch': 3} {'type': 'loss', 'content': 0.0005627021309919655, 'timestamp': '2025-09-10 02:27:07.821649', 'step': 5312, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 7593922352128}, 'timestamp': '2025-09-10 02:27:07.853329', 'step': 5312, 'epoch': 3} {'type': 'loss', 'content': 0.0019646212458610535, 'timestamp': '2025-09-10 02:27:07.858432', 'step': 5313, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 7119318626112}, 'timestamp': '2025-09-10 02:27:07.889314', 'step': 5313, 'epoch': 3} {'type': 'loss', 'content': 0.013626412488520145, 'timestamp': '2025-09-10 02:27:07.896849', 'step': 5314, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 6644714900096}, 'timestamp': '2025-09-10 02:27:07.927894', 'step': 5314, 'epoch': 3} {'type': 'loss', 'content': 0.0002811032463796437, 'timestamp': '2025-09-10 02:27:07.934716', 'step': 5315, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 6644714900096}, 'timestamp': '2025-09-10 02:27:07.968017', 'step': 5315, 'epoch': 3} {'type': 'loss', 'content': 0.00022139211068861187, 'timestamp': '2025-09-10 02:27:07.995554', 'step': 5316, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 304], 'flops': 9017733530176}, 'timestamp': '2025-09-10 02:27:08.029335', 'step': 5316, 'epoch': 3} {'type': 'loss', 'content': 0.00019662485283333808, 'timestamp': '2025-09-10 02:27:08.038048', 'step': 5317, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 320], 'flops': 9492337256192}, 'timestamp': '2025-09-10 02:27:08.069272', 'step': 5317, 'epoch': 3} {'type': 'loss', 'content': 0.0002974488597828895, 'timestamp': '2025-09-10 02:27:08.081644', 'step': 5318, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 368], 'flops': 10916148434240}, 'timestamp': '2025-09-10 02:27:08.116905', 'step': 5318, 'epoch': 3} {'type': 'loss', 'content': 0.0016499229241162539, 'timestamp': '2025-09-10 02:27:08.130677', 'step': 5319, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 6170111174080}, 'timestamp': '2025-09-10 02:27:08.162174', 'step': 5319, 'epoch': 3} {'type': 'loss', 'content': 0.0001375975989503786, 'timestamp': '2025-09-10 02:27:08.189861', 'step': 5320, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 8543129804160}, 'timestamp': '2025-09-10 02:27:08.221651', 'step': 5320, 'epoch': 3} {'type': 'loss', 'content': 0.00034185725962743163, 'timestamp': '2025-09-10 02:27:08.229769', 'step': 5321, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 5695507448064}, 'timestamp': '2025-09-10 02:27:08.262367', 'step': 5321, 'epoch': 3} {'type': 'loss', 'content': 4.766142592416145e-05, 'timestamp': '2025-09-10 02:27:08.266626', 'step': 5322, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 6644714900096}, 'timestamp': '2025-09-10 02:27:08.297298', 'step': 5322, 'epoch': 3} {'type': 'loss', 'content': 0.00020192751253489405, 'timestamp': '2025-09-10 02:27:08.304114', 'step': 5323, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 7119318626112}, 'timestamp': '2025-09-10 02:27:08.334710', 'step': 5323, 'epoch': 3} {'type': 'loss', 'content': 0.00017714654677547514, 'timestamp': '2025-09-10 02:27:08.362803', 'step': 5324, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 5695507448064}, 'timestamp': '2025-09-10 02:27:08.393965', 'step': 5324, 'epoch': 3} {'type': 'loss', 'content': 0.0014676746213808656, 'timestamp': '2025-09-10 02:27:08.399243', 'step': 5325, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 8068526078144}, 'timestamp': '2025-09-10 02:27:08.432061', 'step': 5325, 'epoch': 3} {'type': 'loss', 'content': 0.0007701607537455857, 'timestamp': '2025-09-10 02:27:08.442083', 'step': 5326, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 336], 'flops': 9966940982208}, 'timestamp': '2025-09-10 02:27:08.485836', 'step': 5326, 'epoch': 3} {'type': 'loss', 'content': 0.0003435301478020847, 'timestamp': '2025-09-10 02:27:08.499222', 'step': 5327, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 304], 'flops': 9017733530176}, 'timestamp': '2025-09-10 02:27:08.533247', 'step': 5327, 'epoch': 3} {'type': 'loss', 'content': 8.765466918703169e-05, 'timestamp': '2025-09-10 02:27:08.565826', 'step': 5328, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 176], 'flops': 5220903722048}, 'timestamp': '2025-09-10 02:27:08.596972', 'step': 5328, 'epoch': 3} {'type': 'loss', 'content': 0.00012318461085669696, 'timestamp': '2025-09-10 02:27:08.599247', 'step': 5329, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 352], 'flops': 10441544708224}, 'timestamp': '2025-09-10 02:27:08.633232', 'step': 5329, 'epoch': 3} {'type': 'loss', 'content': 0.0016483607469126582, 'timestamp': '2025-09-10 02:27:08.646899', 'step': 5330, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 7119318626112}, 'timestamp': '2025-09-10 02:27:08.677609', 'step': 5330, 'epoch': 3} {'type': 'loss', 'content': 0.0008386906119994819, 'timestamp': '2025-09-10 02:27:08.684996', 'step': 5331, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 7119318626112}, 'timestamp': '2025-09-10 02:27:08.718082', 'step': 5331, 'epoch': 3} {'type': 'loss', 'content': 4.022822031402029e-05, 'timestamp': '2025-09-10 02:27:08.746436', 'step': 5332, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 7593922352128}, 'timestamp': '2025-09-10 02:27:08.779520', 'step': 5332, 'epoch': 3} {'type': 'loss', 'content': 0.04045605659484863, 'timestamp': '2025-09-10 02:27:08.784700', 'step': 5333, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 8068526078144}, 'timestamp': '2025-09-10 02:27:08.821111', 'step': 5333, 'epoch': 3} {'type': 'loss', 'content': 8.697760495124385e-05, 'timestamp': '2025-09-10 02:27:08.830598', 'step': 5334, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 8543129804160}, 'timestamp': '2025-09-10 02:27:08.863455', 'step': 5334, 'epoch': 3} {'type': 'loss', 'content': 0.0010809339582920074, 'timestamp': '2025-09-10 02:27:08.874263', 'step': 5335, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 5695507448064}, 'timestamp': '2025-09-10 02:27:08.905338', 'step': 5335, 'epoch': 3} {'type': 'loss', 'content': 4.760713272844441e-05, 'timestamp': '2025-09-10 02:27:08.931014', 'step': 5336, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 7593922352128}, 'timestamp': '2025-09-10 02:27:08.966761', 'step': 5336, 'epoch': 3} {'type': 'loss', 'content': 5.413155668065883e-05, 'timestamp': '2025-09-10 02:27:08.972108', 'step': 5337, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:27:09.005498', 'step': 5337, 'epoch': 3} {'type': 'loss', 'content': 0.0001805043575586751, 'timestamp': '2025-09-10 02:27:09.009564', 'step': 5338, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 6170111174080}, 'timestamp': '2025-09-10 02:27:09.043427', 'step': 5338, 'epoch': 3} {'type': 'loss', 'content': 0.0475006178021431, 'timestamp': '2025-09-10 02:27:09.050181', 'step': 5339, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 176], 'flops': 5220903722048}, 'timestamp': '2025-09-10 02:27:09.085556', 'step': 5339, 'epoch': 3} {'type': 'loss', 'content': 0.0001535638002678752, 'timestamp': '2025-09-10 02:27:09.110509', 'step': 5340, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 7593922352128}, 'timestamp': '2025-09-10 02:27:09.141774', 'step': 5340, 'epoch': 3} {'type': 'loss', 'content': 0.0067802309058606625, 'timestamp': '2025-09-10 02:27:09.146917', 'step': 5341, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 8068526078144}, 'timestamp': '2025-09-10 02:27:09.186897', 'step': 5341, 'epoch': 3} {'type': 'loss', 'content': 0.02059916779398918, 'timestamp': '2025-09-10 02:27:09.195909', 'step': 5342, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 6644714900096}, 'timestamp': '2025-09-10 02:27:09.228865', 'step': 5342, 'epoch': 3} {'type': 'loss', 'content': 0.00022535616881214082, 'timestamp': '2025-09-10 02:27:09.235276', 'step': 5343, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 304], 'flops': 9017733530176}, 'timestamp': '2025-09-10 02:27:09.270492', 'step': 5343, 'epoch': 3} {'type': 'loss', 'content': 0.0002255578147014603, 'timestamp': '2025-09-10 02:27:09.301963', 'step': 5344, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 6644714900096}, 'timestamp': '2025-09-10 02:27:09.338400', 'step': 5344, 'epoch': 3} {'type': 'loss', 'content': 8.521532436134294e-05, 'timestamp': '2025-09-10 02:27:09.341414', 'step': 5345, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 7593922352128}, 'timestamp': '2025-09-10 02:27:09.380109', 'step': 5345, 'epoch': 3} {'type': 'loss', 'content': 5.905913349124603e-05, 'timestamp': '2025-09-10 02:27:09.386648', 'step': 5346, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 7119318626112}, 'timestamp': '2025-09-10 02:27:09.421685', 'step': 5346, 'epoch': 3} {'type': 'loss', 'content': 0.0005463002598844469, 'timestamp': '2025-09-10 02:27:09.428072', 'step': 5347, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 7119318626112}, 'timestamp': '2025-09-10 02:27:09.461412', 'step': 5347, 'epoch': 3} {'type': 'loss', 'content': 0.00023405032698065042, 'timestamp': '2025-09-10 02:27:09.488946', 'step': 5348, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 6170111174080}, 'timestamp': '2025-09-10 02:27:09.524728', 'step': 5348, 'epoch': 3} {'type': 'loss', 'content': 0.0009855531388893723, 'timestamp': '2025-09-10 02:27:09.528887', 'step': 5349, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 304], 'flops': 9017733530176}, 'timestamp': '2025-09-10 02:27:09.560327', 'step': 5349, 'epoch': 3} {'type': 'loss', 'content': 0.0001291104854317382, 'timestamp': '2025-09-10 02:27:09.572055', 'step': 5350, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 7119318626112}, 'timestamp': '2025-09-10 02:27:09.603665', 'step': 5350, 'epoch': 3} {'type': 'loss', 'content': 0.0029408042319118977, 'timestamp': '2025-09-10 02:27:09.610845', 'step': 5351, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 4746299996032}, 'timestamp': '2025-09-10 02:27:09.641288', 'step': 5351, 'epoch': 3} {'type': 'loss', 'content': 0.007272699382156134, 'timestamp': '2025-09-10 02:27:09.665079', 'step': 5352, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 6644714900096}, 'timestamp': '2025-09-10 02:27:09.696094', 'step': 5352, 'epoch': 3} {'type': 'loss', 'content': 0.00021549421944655478, 'timestamp': '2025-09-10 02:27:09.700554', 'step': 5353, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 320], 'flops': 9492337256192}, 'timestamp': '2025-09-10 02:27:09.731962', 'step': 5353, 'epoch': 3} {'type': 'loss', 'content': 0.00029168991022743285, 'timestamp': '2025-09-10 02:27:09.744166', 'step': 5354, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 320], 'flops': 9492337256192}, 'timestamp': '2025-09-10 02:27:09.777081', 'step': 5354, 'epoch': 3} {'type': 'loss', 'content': 0.0001440942141925916, 'timestamp': '2025-09-10 02:27:09.789247', 'step': 5355, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 6644714900096}, 'timestamp': '2025-09-10 02:27:09.820693', 'step': 5355, 'epoch': 3} {'type': 'loss', 'content': 0.0006720181554555893, 'timestamp': '2025-09-10 02:27:09.848205', 'step': 5356, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 7119318626112}, 'timestamp': '2025-09-10 02:27:09.881714', 'step': 5356, 'epoch': 3} {'type': 'loss', 'content': 0.0011303488863632083, 'timestamp': '2025-09-10 02:27:09.886340', 'step': 5357, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 304], 'flops': 9017733530176}, 'timestamp': '2025-09-10 02:27:09.918098', 'step': 5357, 'epoch': 3} {'type': 'loss', 'content': 0.0002597134152892977, 'timestamp': '2025-09-10 02:27:09.929734', 'step': 5358, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 5695507448064}, 'timestamp': '2025-09-10 02:27:09.963169', 'step': 5358, 'epoch': 3} {'type': 'loss', 'content': 0.00024505850160494447, 'timestamp': '2025-09-10 02:27:09.967403', 'step': 5359, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 5695507448064}, 'timestamp': '2025-09-10 02:27:09.998959', 'step': 5359, 'epoch': 3} {'type': 'loss', 'content': 0.0003296424984000623, 'timestamp': '2025-09-10 02:27:10.024044', 'step': 5360, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 176], 'flops': 5220903722048}, 'timestamp': '2025-09-10 02:27:10.055241', 'step': 5360, 'epoch': 3} {'type': 'loss', 'content': 0.0009543310734443367, 'timestamp': '2025-09-10 02:27:10.057851', 'step': 5361, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 7593922352128}, 'timestamp': '2025-09-10 02:27:10.091168', 'step': 5361, 'epoch': 3} {'type': 'loss', 'content': 0.00012925105693284422, 'timestamp': '2025-09-10 02:27:10.098638', 'step': 5362, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 6644714900096}, 'timestamp': '2025-09-10 02:27:10.130346', 'step': 5362, 'epoch': 3} {'type': 'loss', 'content': 0.00022506927780341357, 'timestamp': '2025-09-10 02:27:10.137015', 'step': 5363, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 400], 'flops': 11865355886272}, 'timestamp': '2025-09-10 02:27:10.175004', 'step': 5363, 'epoch': 3} {'type': 'loss', 'content': 0.00027559816953726113, 'timestamp': '2025-09-10 02:27:10.211508', 'step': 5364, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 6644714900096}, 'timestamp': '2025-09-10 02:27:10.243345', 'step': 5364, 'epoch': 3} {'type': 'loss', 'content': 7.342889148276299e-05, 'timestamp': '2025-09-10 02:27:10.247674', 'step': 5365, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 7593922352128}, 'timestamp': '2025-09-10 02:27:10.279417', 'step': 5365, 'epoch': 3} {'type': 'loss', 'content': 0.0005296029266901314, 'timestamp': '2025-09-10 02:27:10.286937', 'step': 5366, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 368], 'flops': 10916148434240}, 'timestamp': '2025-09-10 02:27:10.321567', 'step': 5366, 'epoch': 3} {'type': 'loss', 'content': 0.00015757219807710499, 'timestamp': '2025-09-10 02:27:10.335385', 'step': 5367, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 6170111174080}, 'timestamp': '2025-09-10 02:27:10.367196', 'step': 5367, 'epoch': 3} {'type': 'loss', 'content': 0.003111243713647127, 'timestamp': '2025-09-10 02:27:10.394588', 'step': 5368, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 8068526078144}, 'timestamp': '2025-09-10 02:27:10.427238', 'step': 5368, 'epoch': 3} {'type': 'loss', 'content': 0.0011776711326092482, 'timestamp': '2025-09-10 02:27:10.433995', 'step': 5369, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 304], 'flops': 9017733530176}, 'timestamp': '2025-09-10 02:27:10.465234', 'step': 5369, 'epoch': 3} {'type': 'loss', 'content': 0.00025284269941039383, 'timestamp': '2025-09-10 02:27:10.477006', 'step': 5370, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 320], 'flops': 9492337256192}, 'timestamp': '2025-09-10 02:27:10.508081', 'step': 5370, 'epoch': 3} {'type': 'loss', 'content': 0.0006649411516264081, 'timestamp': '2025-09-10 02:27:10.520422', 'step': 5371, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 7119318626112}, 'timestamp': '2025-09-10 02:27:10.551453', 'step': 5371, 'epoch': 3} {'type': 'loss', 'content': 0.001548093743622303, 'timestamp': '2025-09-10 02:27:10.579634', 'step': 5372, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 7119318626112}, 'timestamp': '2025-09-10 02:27:10.611150', 'step': 5372, 'epoch': 3} {'type': 'loss', 'content': 0.0003271247842349112, 'timestamp': '2025-09-10 02:27:10.615775', 'step': 5373, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 7593922352128}, 'timestamp': '2025-09-10 02:27:10.647350', 'step': 5373, 'epoch': 3} {'type': 'loss', 'content': 0.005300581920892, 'timestamp': '2025-09-10 02:27:10.654716', 'step': 5374, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 6644714900096}, 'timestamp': '2025-09-10 02:27:10.687559', 'step': 5374, 'epoch': 3} {'type': 'loss', 'content': 0.0005442704423330724, 'timestamp': '2025-09-10 02:27:10.694191', 'step': 5375, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 7119318626112}, 'timestamp': '2025-09-10 02:27:10.727002', 'step': 5375, 'epoch': 3} {'type': 'loss', 'content': 0.0001799498131731525, 'timestamp': '2025-09-10 02:27:10.754945', 'step': 5376, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 7593922352128}, 'timestamp': '2025-09-10 02:27:10.788594', 'step': 5376, 'epoch': 3} {'type': 'loss', 'content': 0.0002708194369915873, 'timestamp': '2025-09-10 02:27:10.793486', 'step': 5377, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 304], 'flops': 9017733530176}, 'timestamp': '2025-09-10 02:27:10.825847', 'step': 5377, 'epoch': 3} {'type': 'loss', 'content': 0.004976264201104641, 'timestamp': '2025-09-10 02:27:10.837370', 'step': 5378, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 6170111174080}, 'timestamp': '2025-09-10 02:27:10.868671', 'step': 5378, 'epoch': 3} {'type': 'loss', 'content': 0.00040135084418579936, 'timestamp': '2025-09-10 02:27:10.875444', 'step': 5379, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 6644714900096}, 'timestamp': '2025-09-10 02:27:10.909459', 'step': 5379, 'epoch': 3} {'type': 'loss', 'content': 0.0001543848484288901, 'timestamp': '2025-09-10 02:27:10.934664', 'step': 5380, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 8543129804160}, 'timestamp': '2025-09-10 02:27:10.967418', 'step': 5380, 'epoch': 3} {'type': 'loss', 'content': 0.021023396402597427, 'timestamp': '2025-09-10 02:27:10.974542', 'step': 5381, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 176], 'flops': 5220903722048}, 'timestamp': '2025-09-10 02:27:11.006146', 'step': 5381, 'epoch': 3} {'type': 'loss', 'content': 0.0035028725396841764, 'timestamp': '2025-09-10 02:27:11.009911', 'step': 5382, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 6170111174080}, 'timestamp': '2025-09-10 02:27:11.041403', 'step': 5382, 'epoch': 3} {'type': 'loss', 'content': 0.0032905121333897114, 'timestamp': '2025-09-10 02:27:11.048083', 'step': 5383, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 7593922352128}, 'timestamp': '2025-09-10 02:27:11.079922', 'step': 5383, 'epoch': 3} {'type': 'loss', 'content': 0.0008346849936060607, 'timestamp': '2025-09-10 02:27:11.108219', 'step': 5384, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 7119318626112}, 'timestamp': '2025-09-10 02:27:11.139874', 'step': 5384, 'epoch': 3} {'type': 'loss', 'content': 0.00020959800167474896, 'timestamp': '2025-09-10 02:27:11.144722', 'step': 5385, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 8068526078144}, 'timestamp': '2025-09-10 02:27:11.176522', 'step': 5385, 'epoch': 3} {'type': 'loss', 'content': 8.455058559775352e-05, 'timestamp': '2025-09-10 02:27:11.186299', 'step': 5386, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 6644714900096}, 'timestamp': '2025-09-10 02:27:11.217775', 'step': 5386, 'epoch': 3} {'type': 'loss', 'content': 0.00021998195734340698, 'timestamp': '2025-09-10 02:27:11.224526', 'step': 5387, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 432], 'flops': 12814563338304}, 'timestamp': '2025-09-10 02:27:11.264673', 'step': 5387, 'epoch': 3} {'type': 'loss', 'content': 0.021652230992913246, 'timestamp': '2025-09-10 02:27:11.301731', 'step': 5388, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 6170111174080}, 'timestamp': '2025-09-10 02:27:11.334326', 'step': 5388, 'epoch': 3} {'type': 'loss', 'content': 0.0016941409558057785, 'timestamp': '2025-09-10 02:27:11.338505', 'step': 5389, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 4746299996032}, 'timestamp': '2025-09-10 02:27:11.369358', 'step': 5389, 'epoch': 3} {'type': 'loss', 'content': 0.0002850510645657778, 'timestamp': '2025-09-10 02:27:11.372473', 'step': 5390, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 6170111174080}, 'timestamp': '2025-09-10 02:27:11.403900', 'step': 5390, 'epoch': 3} {'type': 'loss', 'content': 0.0029228352941572666, 'timestamp': '2025-09-10 02:27:11.410340', 'step': 5391, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 6644714900096}, 'timestamp': '2025-09-10 02:27:11.441958', 'step': 5391, 'epoch': 3} {'type': 'loss', 'content': 0.01626124419271946, 'timestamp': '2025-09-10 02:27:11.469556', 'step': 5392, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 7119318626112}, 'timestamp': '2025-09-10 02:27:11.503536', 'step': 5392, 'epoch': 3} {'type': 'loss', 'content': 0.001065724529325962, 'timestamp': '2025-09-10 02:27:11.512032', 'step': 5393, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 352], 'flops': 10441544708224}, 'timestamp': '2025-09-10 02:27:11.549882', 'step': 5393, 'epoch': 3} {'type': 'loss', 'content': 0.0003987684322055429, 'timestamp': '2025-09-10 02:27:11.563574', 'step': 5394, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 176], 'flops': 5220903722048}, 'timestamp': '2025-09-10 02:27:11.595440', 'step': 5394, 'epoch': 3} {'type': 'loss', 'content': 0.0007257405668497086, 'timestamp': '2025-09-10 02:27:11.599672', 'step': 5395, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 6644714900096}, 'timestamp': '2025-09-10 02:27:11.631068', 'step': 5395, 'epoch': 3} {'type': 'loss', 'content': 0.003991567995399237, 'timestamp': '2025-09-10 02:27:11.658897', 'step': 5396, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 7593922352128}, 'timestamp': '2025-09-10 02:27:11.689820', 'step': 5396, 'epoch': 3} {'type': 'loss', 'content': 0.001748523791320622, 'timestamp': '2025-09-10 02:27:11.695212', 'step': 5397, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 6170111174080}, 'timestamp': '2025-09-10 02:27:11.728539', 'step': 5397, 'epoch': 3} {'type': 'loss', 'content': 0.0017363729421049356, 'timestamp': '2025-09-10 02:27:11.735372', 'step': 5398, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 400], 'flops': 11865355886272}, 'timestamp': '2025-09-10 02:27:11.774608', 'step': 5398, 'epoch': 3} {'type': 'loss', 'content': 0.0008585135801695287, 'timestamp': '2025-09-10 02:27:11.790251', 'step': 5399, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 7593922352128}, 'timestamp': '2025-09-10 02:27:11.821849', 'step': 5399, 'epoch': 3} {'type': 'loss', 'content': 0.00017365036183036864, 'timestamp': '2025-09-10 02:27:11.850439', 'step': 5400, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 6644714900096}, 'timestamp': '2025-09-10 02:27:11.881998', 'step': 5400, 'epoch': 3} {'type': 'loss', 'content': 0.0003011637891177088, 'timestamp': '2025-09-10 02:27:11.886504', 'step': 5401, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 6644714900096}, 'timestamp': '2025-09-10 02:27:11.917983', 'step': 5401, 'epoch': 3} {'type': 'loss', 'content': 0.03174243122339249, 'timestamp': '2025-09-10 02:27:11.925005', 'step': 5402, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 6644714900096}, 'timestamp': '2025-09-10 02:27:11.956591', 'step': 5402, 'epoch': 3} {'type': 'loss', 'content': 0.00016650068573653698, 'timestamp': '2025-09-10 02:27:11.963318', 'step': 5403, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 304], 'flops': 9017733530176}, 'timestamp': '2025-09-10 02:27:11.995075', 'step': 5403, 'epoch': 3} {'type': 'loss', 'content': 0.0002632165269460529, 'timestamp': '2025-09-10 02:27:12.027740', 'step': 5404, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 6170111174080}, 'timestamp': '2025-09-10 02:27:12.059862', 'step': 5404, 'epoch': 3} {'type': 'loss', 'content': 7.19372255844064e-05, 'timestamp': '2025-09-10 02:27:12.063799', 'step': 5405, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 6170111174080}, 'timestamp': '2025-09-10 02:27:12.096073', 'step': 5405, 'epoch': 3} {'type': 'loss', 'content': 0.0001446278765797615, 'timestamp': '2025-09-10 02:27:12.102727', 'step': 5406, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 6644714900096}, 'timestamp': '2025-09-10 02:27:12.134862', 'step': 5406, 'epoch': 3} {'type': 'loss', 'content': 0.0006326402653940022, 'timestamp': '2025-09-10 02:27:12.141586', 'step': 5407, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 7593922352128}, 'timestamp': '2025-09-10 02:27:12.174294', 'step': 5407, 'epoch': 3} {'type': 'loss', 'content': 9.538559970678762e-05, 'timestamp': '2025-09-10 02:27:12.202505', 'step': 5408, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 7119318626112}, 'timestamp': '2025-09-10 02:27:12.234243', 'step': 5408, 'epoch': 3} {'type': 'loss', 'content': 0.002074205782264471, 'timestamp': '2025-09-10 02:27:12.238921', 'step': 5409, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 432], 'flops': 12814563338304}, 'timestamp': '2025-09-10 02:27:12.278081', 'step': 5409, 'epoch': 3} {'type': 'loss', 'content': 0.004352888558059931, 'timestamp': '2025-09-10 02:27:12.294251', 'step': 5410, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 8543129804160}, 'timestamp': '2025-09-10 02:27:12.325875', 'step': 5410, 'epoch': 3} {'type': 'loss', 'content': 0.0003916619752999395, 'timestamp': '2025-09-10 02:27:12.336292', 'step': 5411, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 7119318626112}, 'timestamp': '2025-09-10 02:27:12.367817', 'step': 5411, 'epoch': 3} {'type': 'loss', 'content': 6.0024420236004516e-05, 'timestamp': '2025-09-10 02:27:12.395910', 'step': 5412, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 7119318626112}, 'timestamp': '2025-09-10 02:27:12.427527', 'step': 5412, 'epoch': 3} {'type': 'loss', 'content': 0.0003417480329517275, 'timestamp': '2025-09-10 02:27:12.432346', 'step': 5413, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 6170111174080}, 'timestamp': '2025-09-10 02:27:12.464295', 'step': 5413, 'epoch': 3} {'type': 'loss', 'content': 0.04961821064352989, 'timestamp': '2025-09-10 02:27:12.470898', 'step': 5414, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 320], 'flops': 9492337256192}, 'timestamp': '2025-09-10 02:27:12.503543', 'step': 5414, 'epoch': 3} {'type': 'loss', 'content': 0.000651273294351995, 'timestamp': '2025-09-10 02:27:12.515505', 'step': 5415, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 6170111174080}, 'timestamp': '2025-09-10 02:27:12.547695', 'step': 5415, 'epoch': 3} {'type': 'loss', 'content': 0.0005461532273329794, 'timestamp': '2025-09-10 02:27:12.574876', 'step': 5416, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 8543129804160}, 'timestamp': '2025-09-10 02:27:12.609568', 'step': 5416, 'epoch': 3} {'type': 'loss', 'content': 0.00011130100028822199, 'timestamp': '2025-09-10 02:27:12.617143', 'step': 5417, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 7119318626112}, 'timestamp': '2025-09-10 02:27:12.648604', 'step': 5417, 'epoch': 3} {'type': 'loss', 'content': 0.004985901992768049, 'timestamp': '2025-09-10 02:27:12.655668', 'step': 5418, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 320], 'flops': 9492337256192}, 'timestamp': '2025-09-10 02:27:12.687344', 'step': 5418, 'epoch': 3} {'type': 'loss', 'content': 0.00047179448301903903, 'timestamp': '2025-09-10 02:27:12.699601', 'step': 5419, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 5695507448064}, 'timestamp': '2025-09-10 02:27:12.732923', 'step': 5419, 'epoch': 3} {'type': 'loss', 'content': 0.04157213494181633, 'timestamp': '2025-09-10 02:27:12.757955', 'step': 5420, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 304], 'flops': 9017733530176}, 'timestamp': '2025-09-10 02:27:12.789802', 'step': 5420, 'epoch': 3} {'type': 'loss', 'content': 0.000143712037242949, 'timestamp': '2025-09-10 02:27:12.799045', 'step': 5421, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 336], 'flops': 9966940982208}, 'timestamp': '2025-09-10 02:27:12.832584', 'step': 5421, 'epoch': 3} {'type': 'loss', 'content': 0.0028550736606121063, 'timestamp': '2025-09-10 02:27:12.845910', 'step': 5422, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 7119318626112}, 'timestamp': '2025-09-10 02:27:12.878184', 'step': 5422, 'epoch': 3} {'type': 'loss', 'content': 0.0014410755829885602, 'timestamp': '2025-09-10 02:27:12.885163', 'step': 5423, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 176], 'flops': 5220903722048}, 'timestamp': '2025-09-10 02:27:12.917557', 'step': 5423, 'epoch': 3} {'type': 'loss', 'content': 0.00013774879334960133, 'timestamp': '2025-09-10 02:27:12.941980', 'step': 5424, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 336], 'flops': 9966940982208}, 'timestamp': '2025-09-10 02:27:12.974130', 'step': 5424, 'epoch': 3} {'type': 'loss', 'content': 0.0003147267270833254, 'timestamp': '2025-09-10 02:27:12.986753', 'step': 5425, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-10 02:27:13.019181', 'step': 5425, 'epoch': 3} {'type': 'loss', 'content': 0.0036901801358908415, 'timestamp': '2025-09-10 02:27:13.021451', 'step': 5426, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 8068526078144}, 'timestamp': '2025-09-10 02:27:13.052834', 'step': 5426, 'epoch': 3} {'type': 'loss', 'content': 9.131115803029388e-05, 'timestamp': '2025-09-10 02:27:13.062813', 'step': 5427, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 6170111174080}, 'timestamp': '2025-09-10 02:27:13.094571', 'step': 5427, 'epoch': 3} {'type': 'loss', 'content': 0.00034638322540558875, 'timestamp': '2025-09-10 02:27:13.121982', 'step': 5428, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 6644714900096}, 'timestamp': '2025-09-10 02:27:13.152789', 'step': 5428, 'epoch': 3} {'type': 'loss', 'content': 0.0003097867302130908, 'timestamp': '2025-09-10 02:27:13.157434', 'step': 5429, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 8068526078144}, 'timestamp': '2025-09-10 02:27:13.188718', 'step': 5429, 'epoch': 3} {'type': 'loss', 'content': 0.0006205638055689633, 'timestamp': '2025-09-10 02:27:13.198938', 'step': 5430, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 352], 'flops': 10441544708224}, 'timestamp': '2025-09-10 02:27:13.234382', 'step': 5430, 'epoch': 3} {'type': 'loss', 'content': 0.004306466784328222, 'timestamp': '2025-09-10 02:27:13.248054', 'step': 5431, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 8543129804160}, 'timestamp': '2025-09-10 02:27:13.280356', 'step': 5431, 'epoch': 3} {'type': 'loss', 'content': 0.0013333893148228526, 'timestamp': '2025-09-10 02:27:13.311264', 'step': 5432, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 400], 'flops': 11865355886272}, 'timestamp': '2025-09-10 02:27:13.348541', 'step': 5432, 'epoch': 3} {'type': 'loss', 'content': 0.003753043944016099, 'timestamp': '2025-09-10 02:27:13.363655', 'step': 5433, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 8543129804160}, 'timestamp': '2025-09-10 02:27:13.395676', 'step': 5433, 'epoch': 3} {'type': 'loss', 'content': 0.00018160228501074016, 'timestamp': '2025-09-10 02:27:13.406258', 'step': 5434, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 6644714900096}, 'timestamp': '2025-09-10 02:27:13.437458', 'step': 5434, 'epoch': 3} {'type': 'loss', 'content': 0.0016118159983307123, 'timestamp': '2025-09-10 02:27:13.444359', 'step': 5435, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 8543129804160}, 'timestamp': '2025-09-10 02:27:13.475084', 'step': 5435, 'epoch': 3} {'type': 'loss', 'content': 0.00042566441697999835, 'timestamp': '2025-09-10 02:27:13.506907', 'step': 5436, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 6170111174080}, 'timestamp': '2025-09-10 02:27:13.538508', 'step': 5436, 'epoch': 3} {'type': 'loss', 'content': 0.0005374281900003552, 'timestamp': '2025-09-10 02:27:13.542905', 'step': 5437, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 6644714900096}, 'timestamp': '2025-09-10 02:27:13.573793', 'step': 5437, 'epoch': 3} {'type': 'loss', 'content': 0.0007307881605811417, 'timestamp': '2025-09-10 02:27:13.580832', 'step': 5438, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 5695507448064}, 'timestamp': '2025-09-10 02:27:13.611475', 'step': 5438, 'epoch': 3} {'type': 'loss', 'content': 0.0004055744793731719, 'timestamp': '2025-09-10 02:27:13.615784', 'step': 5439, 'epoch': 3} {'type': 'flops', 'content': [{'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5695213340928}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3796808916480}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4429610391296}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4746011128704}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4746011128704}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6011614078336}, {'type': 'perplexity', 'in_batch_dim': [8, 736], 'batch_size': 8, 'flops': 14554433988352}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3796808916480}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4113209653888}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3796808916480}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4746011128704}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2847606704256}, {'type': 'perplexity', 'in_batch_dim': [8, 320], 'batch_size': 8, 'flops': 6328014815744}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5378812603520}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3796808916480}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4746011128704}, {'type': 'perplexity', 'in_batch_dim': [8, 352], 'batch_size': 8, 'flops': 6960816290560}, {'type': 'perplexity', 'in_batch_dim': [8, 384], 'batch_size': 8, 'flops': 7593617765376}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5378812603520}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4429610391296}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4113209653888}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5062411866112}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5062411866112}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4429610391296}, {'type': 'perplexity', 'in_batch_dim': [8, 480], 'batch_size': 8, 'flops': 9492022189824}, {'type': 'perplexity', 'in_batch_dim': [8, 448], 'batch_size': 8, 'flops': 8859220715008}, {'type': 'perplexity', 'in_batch_dim': [8, 320], 'batch_size': 8, 'flops': 6328014815744}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3796808916480}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6011614078336}, {'type': 'perplexity', 'in_batch_dim': [8, 320], 'batch_size': 8, 'flops': 6328014815744}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5695213340928}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4746011128704}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4429610391296}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5378812603520}, {'type': 'perplexity', 'in_batch_dim': [8, 544], 'batch_size': 8, 'flops': 10757625139456}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4429610391296}, {'type': 'perplexity', 'in_batch_dim': [8, 320], 'batch_size': 8, 'flops': 6328014815744}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4113209653888}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4113209653888}, {'type': 'perplexity', 'in_batch_dim': [8, 320], 'batch_size': 8, 'flops': 6328014815744}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3480408179072}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5062411866112}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6011614078336}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4429610391296}, {'type': 'perplexity', 'in_batch_dim': [8, 384], 'batch_size': 8, 'flops': 7593617765376}, {'type': 'perplexity', 'in_batch_dim': [8, 416], 'batch_size': 8, 'flops': 8226419240192}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4746011128704}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3480408179072}, {'type': 'perplexity', 'in_batch_dim': [8, 384], 'batch_size': 8, 'flops': 7593617765376}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3480408179072}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3480408179072}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5378812603520}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3796808916480}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4113209653888}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5062411866112}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5378812603520}, {'type': 'perplexity', 'in_batch_dim': [8, 320], 'batch_size': 8, 'flops': 6328014815744}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4429610391296}, {'type': 'perplexity', 'in_batch_dim': [8, 160], 'batch_size': 8, 'flops': 3164007441664}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4113209653888}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5695213340928}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3796808916480}, {'type': 'perplexity', 'in_batch_dim': [8, 160], 'batch_size': 8, 'flops': 3164007441664}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6011614078336}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5062411866112}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4429610391296}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3480408179072}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4746011128704}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3480408179072}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4113209653888}, {'type': 'perplexity', 'in_batch_dim': [8, 624], 'batch_size': 8, 'flops': 12339628826496}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4113209653888}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5695213340928}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5062411866112}, {'type': 'perplexity', 'in_batch_dim': [8, 384], 'batch_size': 8, 'flops': 7593617765376}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4429610391296}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4113209653888}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5062411866112}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4429610391296}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5695213340928}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3796808916480}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5378812603520}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4429610391296}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5378812603520}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3796808916480}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5695213340928}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4429610391296}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5062411866112}, {'type': 'perplexity', 'in_batch_dim': [8, 160], 'batch_size': 8, 'flops': 3164007441664}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4746011128704}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5695213340928}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3796808916480}, {'type': 'perplexity', 'in_batch_dim': [8, 416], 'batch_size': 8, 'flops': 8226419240192}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3796808916480}, {'type': 'perplexity', 'in_batch_dim': [8, 160], 'batch_size': 8, 'flops': 3164007441664}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4746011128704}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3796808916480}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2847606704256}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5378812603520}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4429610391296}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4113209653888}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4429610391296}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6011614078336}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4113209653888}, {'type': 'perplexity', 'in_batch_dim': [8, 320], 'batch_size': 8, 'flops': 6328014815744}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4746011128704}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4746011128704}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3796808916480}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5062411866112}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3480408179072}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3796808916480}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3480408179072}, {'type': 'perplexity', 'in_batch_dim': [8, 352], 'batch_size': 8, 'flops': 6960816290560}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5062411866112}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4429610391296}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6011614078336}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5062411866112}, {'type': 'perplexity', 'in_batch_dim': [8, 576], 'batch_size': 8, 'flops': 11390426614272}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4429610391296}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3796808916480}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6011614078336}, {'type': 'perplexity', 'in_batch_dim': [8, 432], 'batch_size': 8, 'flops': 8542819977600}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5378812603520}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5378812603520}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5062411866112}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4113209653888}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4113209653888}, {'type': 'perplexity', 'in_batch_dim': [8, 368], 'batch_size': 8, 'flops': 7277217027968}, {'type': 'perplexity', 'in_batch_dim': [8, 160], 'batch_size': 8, 'flops': 3164007441664}, {'type': 'perplexity', 'in_batch_dim': [8, 368], 'batch_size': 8, 'flops': 7277217027968}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3796808916480}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4746011128704}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4113209653888}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4429610391296}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5695213340928}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5695213340928}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5062411866112}, {'type': 'perplexity', 'in_batch_dim': [8, 432], 'batch_size': 8, 'flops': 8542819977600}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5062411866112}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4113209653888}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3480408179072}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3480408179072}, {'type': 'perplexity', 'in_batch_dim': [8, 160], 'batch_size': 8, 'flops': 3164007441664}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4113209653888}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3796808916480}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5062411866112}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 368], 'batch_size': 8, 'flops': 7277217027968}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5378812603520}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5378812603520}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5062411866112}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2847606704256}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4429610391296}, {'type': 'perplexity', 'in_batch_dim': [8, 320], 'batch_size': 8, 'flops': 6328014815744}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3796808916480}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4429610391296}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4113209653888}, {'type': 'perplexity', 'in_batch_dim': [8, 384], 'batch_size': 8, 'flops': 7593617765376}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5378812603520}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4746011128704}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3480408179072}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5062411866112}, {'type': 'perplexity', 'in_batch_dim': [8, 320], 'batch_size': 8, 'flops': 6328014815744}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5378812603520}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4113209653888}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4113209653888}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5695213340928}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3480408179072}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6011614078336}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4746011128704}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5378812603520}, {'type': 'perplexity', 'in_batch_dim': [8, 320], 'batch_size': 8, 'flops': 6328014815744}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4429610391296}, {'type': 'perplexity', 'in_batch_dim': [8, 320], 'batch_size': 8, 'flops': 6328014815744}, {'type': 'perplexity', 'in_batch_dim': [8, 416], 'batch_size': 8, 'flops': 8226419240192}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5062411866112}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5062411866112}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2847606704256}, {'type': 'perplexity', 'in_batch_dim': [8, 336], 'batch_size': 8, 'flops': 6644415553152}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6011614078336}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5062411866112}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4113209653888}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5695213340928}, {'type': 'perplexity', 'in_batch_dim': [8, 416], 'batch_size': 8, 'flops': 8226419240192}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3796808916480}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5695213340928}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4746011128704}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4429610391296}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4113209653888}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3480408179072}, {'type': 'perplexity', 'in_batch_dim': [8, 320], 'batch_size': 8, 'flops': 6328014815744}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3480408179072}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5378812603520}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6011614078336}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4429610391296}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5695213340928}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5695213340928}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3480408179072}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4429610391296}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5062411866112}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3480408179072}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4113209653888}, {'type': 'perplexity', 'in_batch_dim': [8, 352], 'batch_size': 8, 'flops': 6960816290560}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4113209653888}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2847606704256}, {'type': 'perplexity', 'in_batch_dim': [8, 160], 'batch_size': 8, 'flops': 3164007441664}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3796808916480}, {'type': 'perplexity', 'in_batch_dim': [8, 496], 'batch_size': 8, 'flops': 9808422927232}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3480408179072}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4113209653888}, {'type': 'perplexity', 'in_batch_dim': [8, 544], 'batch_size': 8, 'flops': 10757625139456}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6011614078336}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5378812603520}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5695213340928}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4746011128704}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5695213340928}, {'type': 'perplexity', 'in_batch_dim': [8, 384], 'batch_size': 8, 'flops': 7593617765376}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3480408179072}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5378812603520}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3480408179072}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5378812603520}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5062411866112}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3796808916480}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5062411866112}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5695213340928}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5378812603520}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4429610391296}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5062411866112}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3480408179072}, {'type': 'perplexity', 'in_batch_dim': [8, 656], 'batch_size': 8, 'flops': 12972430301312}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5062411866112}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3796808916480}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3480408179072}, {'type': 'perplexity', 'in_batch_dim': [8, 160], 'batch_size': 8, 'flops': 3164007441664}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5695213340928}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6011614078336}, {'type': 'perplexity', 'in_batch_dim': [8, 320], 'batch_size': 8, 'flops': 6328014815744}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5062411866112}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5062411866112}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5695213340928}, {'type': 'perplexity', 'in_batch_dim': [8, 320], 'batch_size': 8, 'flops': 6328014815744}, {'type': 'perplexity', 'in_batch_dim': [8, 384], 'batch_size': 8, 'flops': 7593617765376}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5062411866112}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4746011128704}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4113209653888}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4746011128704}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4746011128704}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3480408179072}, {'type': 'perplexity', 'in_batch_dim': [8, 320], 'batch_size': 8, 'flops': 6328014815744}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5695213340928}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5062411866112}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4113209653888}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4746011128704}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2847606704256}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3796808916480}, {'type': 'perplexity', 'in_batch_dim': [8, 336], 'batch_size': 8, 'flops': 6644415553152}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4113209653888}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4746011128704}, {'type': 'perplexity', 'in_batch_dim': [8, 496], 'batch_size': 8, 'flops': 9808422927232}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5378812603520}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5378812603520}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4429610391296}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5062411866112}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6011614078336}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4746011128704}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3796808916480}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4113209653888}, {'type': 'perplexity', 'in_batch_dim': [8, 496], 'batch_size': 8, 'flops': 9808422927232}, {'type': 'perplexity', 'in_batch_dim': [8, 448], 'batch_size': 8, 'flops': 8859220715008}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6011614078336}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3796808916480}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3480408179072}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5062411866112}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4429610391296}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4113209653888}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4746011128704}, {'type': 'perplexity', 'in_batch_dim': [8, 320], 'batch_size': 8, 'flops': 6328014815744}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6011614078336}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3480408179072}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3796808916480}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3796808916480}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4746011128704}, {'type': 'perplexity', 'in_batch_dim': [8, 416], 'batch_size': 8, 'flops': 8226419240192}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5695213340928}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6011614078336}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6011614078336}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4746011128704}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3480408179072}, {'type': 'perplexity', 'in_batch_dim': [8, 160], 'batch_size': 8, 'flops': 3164007441664}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5695213340928}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3480408179072}, {'type': 'perplexity', 'in_batch_dim': [8, 400], 'batch_size': 8, 'flops': 7910018502784}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4429610391296}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5378812603520}, {'type': 'perplexity', 'in_batch_dim': [8, 400], 'batch_size': 8, 'flops': 7910018502784}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5378812603520}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3480408179072}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6011614078336}, {'type': 'perplexity', 'in_batch_dim': [8, 160], 'batch_size': 8, 'flops': 3164007441664}, {'type': 'perplexity', 'in_batch_dim': [8, 352], 'batch_size': 8, 'flops': 6960816290560}, {'type': 'perplexity', 'in_batch_dim': [8, 320], 'batch_size': 8, 'flops': 6328014815744}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5062411866112}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5062411866112}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4113209653888}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4429610391296}, {'type': 'perplexity', 'in_batch_dim': [8, 352], 'batch_size': 8, 'flops': 6960816290560}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4113209653888}, {'type': 'perplexity', 'in_batch_dim': [8, 160], 'batch_size': 8, 'flops': 3164007441664}, {'type': 'perplexity', 'in_batch_dim': [8, 336], 'batch_size': 8, 'flops': 6644415553152}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4429610391296}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3796808916480}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5378812603520}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5378812603520}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4113209653888}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5378812603520}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3796808916480}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5378812603520}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5378812603520}, {'type': 'perplexity', 'in_batch_dim': [8, 160], 'batch_size': 8, 'flops': 3164007441664}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3480408179072}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6011614078336}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5695213340928}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5378812603520}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5378812603520}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4429610391296}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4746011128704}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6011614078336}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5062411866112}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6011614078336}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5378812603520}, {'type': 'perplexity', 'in_batch_dim': [8, 160], 'batch_size': 8, 'flops': 3164007441664}, {'type': 'perplexity', 'in_batch_dim': [8, 160], 'batch_size': 8, 'flops': 3164007441664}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5062411866112}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5062411866112}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3796808916480}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6011614078336}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4113209653888}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4746011128704}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5378812603520}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3480408179072}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3796808916480}, {'type': 'perplexity', 'in_batch_dim': [8, 320], 'batch_size': 8, 'flops': 6328014815744}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5062411866112}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4746011128704}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5062411866112}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4746011128704}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6011614078336}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3796808916480}, {'type': 'perplexity', 'in_batch_dim': [8, 160], 'batch_size': 8, 'flops': 3164007441664}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2847606704256}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4429610391296}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4746011128704}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5062411866112}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5378812603520}, {'type': 'perplexity', 'in_batch_dim': [8, 464], 'batch_size': 8, 'flops': 9175621452416}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4113209653888}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3480408179072}, {'type': 'perplexity', 'in_batch_dim': [8, 384], 'batch_size': 8, 'flops': 7593617765376}, {'type': 'perplexity', 'in_batch_dim': [8, 448], 'batch_size': 8, 'flops': 8859220715008}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5378812603520}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4113209653888}, {'type': 'perplexity', 'in_batch_dim': [8, 320], 'batch_size': 8, 'flops': 6328014815744}, {'type': 'perplexity', 'in_batch_dim': [8, 560], 'batch_size': 8, 'flops': 11074025876864}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5378812603520}, {'type': 'perplexity', 'in_batch_dim': [8, 160], 'batch_size': 8, 'flops': 3164007441664}, {'type': 'perplexity', 'in_batch_dim': [8, 320], 'batch_size': 8, 'flops': 6328014815744}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5062411866112}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6011614078336}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4746011128704}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6011614078336}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3796808916480}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4429610391296}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4113209653888}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4746011128704}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3480408179072}, {'type': 'perplexity', 'in_batch_dim': [8, 384], 'batch_size': 8, 'flops': 7593617765376}, {'type': 'perplexity', 'in_batch_dim': [8, 576], 'batch_size': 8, 'flops': 11390426614272}, {'type': 'perplexity', 'in_batch_dim': [8, 160], 'batch_size': 8, 'flops': 3164007441664}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4746011128704}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4746011128704}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4429610391296}, {'type': 'perplexity', 'in_batch_dim': [8, 336], 'batch_size': 8, 'flops': 6644415553152}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4746011128704}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3796808916480}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3796808916480}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5062411866112}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5378812603520}, {'type': 'perplexity', 'in_batch_dim': [8, 368], 'batch_size': 8, 'flops': 7277217027968}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3480408179072}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4113209653888}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4746011128704}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3480408179072}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4746011128704}, {'type': 'perplexity', 'in_batch_dim': [8, 1168], 'batch_size': 8, 'flops': 23097253898368}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5062411866112}, {'type': 'perplexity', 'in_batch_dim': [8, 160], 'batch_size': 8, 'flops': 3164007441664}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4746011128704}, {'type': 'perplexity', 'in_batch_dim': [8, 640], 'batch_size': 8, 'flops': 12656029563904}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5378812603520}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4429610391296}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4429610391296}, {'type': 'perplexity', 'in_batch_dim': [8, 384], 'batch_size': 8, 'flops': 7593617765376}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3480408179072}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4113209653888}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3796808916480}, {'type': 'perplexity', 'in_batch_dim': [8, 336], 'batch_size': 8, 'flops': 6644415553152}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3480408179072}, {'type': 'perplexity', 'in_batch_dim': [6, 208], 'batch_size': 8, 'flops': 4113209653888}], 'timestamp': '2025-09-10 02:27:23.615052', 'step': 5439, 'epoch': 3} {'type': 'pplx', 'content': 20426999.100602426, 'timestamp': '2025-09-10 02:27:23.619545', 'step': 5439, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 176], 'flops': 5220903722048}, 'timestamp': '2025-09-10 02:27:23.651955', 'step': 5439, 'epoch': 3} {'type': 'loss', 'content': 0.00016575765039306134, 'timestamp': '2025-09-10 02:27:23.676033', 'step': 5440, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 5695507448064}, 'timestamp': '2025-09-10 02:27:23.705982', 'step': 5440, 'epoch': 3} {'type': 'loss', 'content': 0.00043351706699468195, 'timestamp': '2025-09-10 02:27:23.707867', 'step': 5441, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 5695507448064}, 'timestamp': '2025-09-10 02:27:23.736882', 'step': 5441, 'epoch': 3} {'type': 'loss', 'content': 0.0011763531947508454, 'timestamp': '2025-09-10 02:27:23.741399', 'step': 5442, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 416], 'flops': 12339959612288}, 'timestamp': '2025-09-10 02:27:23.779772', 'step': 5442, 'epoch': 3} {'type': 'loss', 'content': 0.0002184472105000168, 'timestamp': '2025-09-10 02:27:23.795688', 'step': 5443, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 6170111174080}, 'timestamp': '2025-09-10 02:27:23.826560', 'step': 5443, 'epoch': 3} {'type': 'loss', 'content': 0.006947829853743315, 'timestamp': '2025-09-10 02:27:23.853998', 'step': 5444, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 320], 'flops': 9492337256192}, 'timestamp': '2025-09-10 02:27:23.884265', 'step': 5444, 'epoch': 3} {'type': 'loss', 'content': 0.00013986548583488911, 'timestamp': '2025-09-10 02:27:23.894618', 'step': 5445, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 7119318626112}, 'timestamp': '2025-09-10 02:27:23.925075', 'step': 5445, 'epoch': 3} {'type': 'loss', 'content': 0.001293918932788074, 'timestamp': '2025-09-10 02:27:23.932472', 'step': 5446, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 8543129804160}, 'timestamp': '2025-09-10 02:27:23.962245', 'step': 5446, 'epoch': 3} {'type': 'loss', 'content': 0.0006181861972436309, 'timestamp': '2025-09-10 02:27:23.973063', 'step': 5447, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 176], 'flops': 5220903722048}, 'timestamp': '2025-09-10 02:27:24.002436', 'step': 5447, 'epoch': 3} {'type': 'loss', 'content': 0.000459133880212903, 'timestamp': '2025-09-10 02:27:24.027256', 'step': 5448, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 6644714900096}, 'timestamp': '2025-09-10 02:27:24.057967', 'step': 5448, 'epoch': 3} {'type': 'loss', 'content': 0.009505641646683216, 'timestamp': '2025-09-10 02:27:24.062630', 'step': 5449, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 336], 'flops': 9966940982208}, 'timestamp': '2025-09-10 02:27:24.095581', 'step': 5449, 'epoch': 3} {'type': 'loss', 'content': 0.0008678924641571939, 'timestamp': '2025-09-10 02:27:24.108930', 'step': 5450, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 400], 'flops': 11865355886272}, 'timestamp': '2025-09-10 02:27:24.147699', 'step': 5450, 'epoch': 3} {'type': 'loss', 'content': 0.0002460898831486702, 'timestamp': '2025-09-10 02:27:24.163384', 'step': 5451, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 7119318626112}, 'timestamp': '2025-09-10 02:27:24.193907', 'step': 5451, 'epoch': 3} {'type': 'loss', 'content': 0.0009736836655065417, 'timestamp': '2025-09-10 02:27:24.222218', 'step': 5452, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 7593922352128}, 'timestamp': '2025-09-10 02:27:24.251808', 'step': 5452, 'epoch': 3} {'type': 'loss', 'content': 0.0001476912439102307, 'timestamp': '2025-09-10 02:27:24.257124', 'step': 5453, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-10 02:27:24.287170', 'step': 5453, 'epoch': 3} {'type': 'loss', 'content': 0.00234299642033875, 'timestamp': '2025-09-10 02:27:24.289140', 'step': 5454, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 5695507448064}, 'timestamp': '2025-09-10 02:27:24.319252', 'step': 5454, 'epoch': 3} {'type': 'loss', 'content': 0.0003727012954186648, 'timestamp': '2025-09-10 02:27:24.323506', 'step': 5455, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 6170111174080}, 'timestamp': '2025-09-10 02:27:24.353122', 'step': 5455, 'epoch': 3} {'type': 'loss', 'content': 0.00016866849910002202, 'timestamp': '2025-09-10 02:27:24.380665', 'step': 5456, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 176], 'flops': 5220903722048}, 'timestamp': '2025-09-10 02:27:24.411019', 'step': 5456, 'epoch': 3} {'type': 'loss', 'content': 0.00030947296181693673, 'timestamp': '2025-09-10 02:27:24.413046', 'step': 5457, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 176], 'flops': 5220903722048}, 'timestamp': '2025-09-10 02:27:24.443865', 'step': 5457, 'epoch': 3} {'type': 'loss', 'content': 0.0004413680580910295, 'timestamp': '2025-09-10 02:27:24.448059', 'step': 5458, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 5695507448064}, 'timestamp': '2025-09-10 02:27:24.481001', 'step': 5458, 'epoch': 3} {'type': 'loss', 'content': 0.00032807476236484945, 'timestamp': '2025-09-10 02:27:24.485453', 'step': 5459, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 320], 'flops': 9492337256192}, 'timestamp': '2025-09-10 02:27:24.515733', 'step': 5459, 'epoch': 3} {'type': 'loss', 'content': 0.00025154382456094027, 'timestamp': '2025-09-10 02:27:24.549197', 'step': 5460, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 6644714900096}, 'timestamp': '2025-09-10 02:27:24.585862', 'step': 5460, 'epoch': 3} {'type': 'loss', 'content': 0.001103718881495297, 'timestamp': '2025-09-10 02:27:24.590640', 'step': 5461, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 8068526078144}, 'timestamp': '2025-09-10 02:27:24.621484', 'step': 5461, 'epoch': 3} {'type': 'loss', 'content': 0.0004202440322842449, 'timestamp': '2025-09-10 02:27:24.631686', 'step': 5462, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 6644714900096}, 'timestamp': '2025-09-10 02:27:24.662057', 'step': 5462, 'epoch': 3} {'type': 'loss', 'content': 0.0002868052397388965, 'timestamp': '2025-09-10 02:27:24.669186', 'step': 5463, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 336], 'flops': 9966940982208}, 'timestamp': '2025-09-10 02:27:24.704523', 'step': 5463, 'epoch': 3} {'type': 'loss', 'content': 0.0005337927141226828, 'timestamp': '2025-09-10 02:27:24.738823', 'step': 5464, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 6644714900096}, 'timestamp': '2025-09-10 02:27:24.773003', 'step': 5464, 'epoch': 3} {'type': 'loss', 'content': 0.000878959137480706, 'timestamp': '2025-09-10 02:27:24.777704', 'step': 5465, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 6644714900096}, 'timestamp': '2025-09-10 02:27:24.807633', 'step': 5465, 'epoch': 3} {'type': 'loss', 'content': 0.00041015897295437753, 'timestamp': '2025-09-10 02:27:24.814729', 'step': 5466, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 8543129804160}, 'timestamp': '2025-09-10 02:27:24.845250', 'step': 5466, 'epoch': 3} {'type': 'loss', 'content': 0.008768526837229729, 'timestamp': '2025-09-10 02:27:24.856091', 'step': 5467, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 8543129804160}, 'timestamp': '2025-09-10 02:27:24.886823', 'step': 5467, 'epoch': 3} {'type': 'loss', 'content': 0.00015605808584950864, 'timestamp': '2025-09-10 02:27:24.918482', 'step': 5468, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 7593922352128}, 'timestamp': '2025-09-10 02:27:24.948828', 'step': 5468, 'epoch': 3} {'type': 'loss', 'content': 0.00048326136311516166, 'timestamp': '2025-09-10 02:27:24.954048', 'step': 5469, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 5695507448064}, 'timestamp': '2025-09-10 02:27:24.983806', 'step': 5469, 'epoch': 3} {'type': 'loss', 'content': 0.0007095988839864731, 'timestamp': '2025-09-10 02:27:24.988163', 'step': 5470, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 7119318626112}, 'timestamp': '2025-09-10 02:27:25.018745', 'step': 5470, 'epoch': 3} {'type': 'loss', 'content': 0.0030129605438560247, 'timestamp': '2025-09-10 02:27:25.026198', 'step': 5471, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 480], 'flops': 14238374516352}, 'timestamp': '2025-09-10 02:27:25.068239', 'step': 5471, 'epoch': 3} {'type': 'loss', 'content': 0.008913605473935604, 'timestamp': '2025-09-10 02:27:25.106488', 'step': 5472, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 320], 'flops': 9492337256192}, 'timestamp': '2025-09-10 02:27:25.137125', 'step': 5472, 'epoch': 3} {'type': 'loss', 'content': 0.0003532721020746976, 'timestamp': '2025-09-10 02:27:25.147566', 'step': 5473, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 6644714900096}, 'timestamp': '2025-09-10 02:27:25.177878', 'step': 5473, 'epoch': 3} {'type': 'loss', 'content': 0.0001848796382546425, 'timestamp': '2025-09-10 02:27:25.184808', 'step': 5474, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 6170111174080}, 'timestamp': '2025-09-10 02:27:25.214781', 'step': 5474, 'epoch': 3} {'type': 'loss', 'content': 0.0004907096736133099, 'timestamp': '2025-09-10 02:27:25.221615', 'step': 5475, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 320], 'flops': 9492337256192}, 'timestamp': '2025-09-10 02:27:25.252159', 'step': 5475, 'epoch': 3} {'type': 'loss', 'content': 0.0004714152601081878, 'timestamp': '2025-09-10 02:27:25.285615', 'step': 5476, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 384], 'flops': 11390752160256}, 'timestamp': '2025-09-10 02:27:25.318176', 'step': 5476, 'epoch': 3} {'type': 'loss', 'content': 0.0014040955575183034, 'timestamp': '2025-09-10 02:27:25.331456', 'step': 5477, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 6170111174080}, 'timestamp': '2025-09-10 02:27:25.361847', 'step': 5477, 'epoch': 3} {'type': 'loss', 'content': 0.0022547480184584856, 'timestamp': '2025-09-10 02:27:25.368579', 'step': 5478, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 6644714900096}, 'timestamp': '2025-09-10 02:27:25.399396', 'step': 5478, 'epoch': 3} {'type': 'loss', 'content': 0.0004808087833225727, 'timestamp': '2025-09-10 02:27:25.406197', 'step': 5479, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 6644714900096}, 'timestamp': '2025-09-10 02:27:25.436828', 'step': 5479, 'epoch': 3} {'type': 'loss', 'content': 0.00024939377908594906, 'timestamp': '2025-09-10 02:27:25.464613', 'step': 5480, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 400], 'flops': 11865355886272}, 'timestamp': '2025-09-10 02:27:25.500516', 'step': 5480, 'epoch': 3} {'type': 'loss', 'content': 0.0003376358072273433, 'timestamp': '2025-09-10 02:27:25.515696', 'step': 5481, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 6644714900096}, 'timestamp': '2025-09-10 02:27:25.545597', 'step': 5481, 'epoch': 3} {'type': 'loss', 'content': 0.00017180813301820308, 'timestamp': '2025-09-10 02:27:25.552497', 'step': 5482, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 4746299996032}, 'timestamp': '2025-09-10 02:27:25.592196', 'step': 5482, 'epoch': 3} {'type': 'loss', 'content': 0.00040549953700974584, 'timestamp': '2025-09-10 02:27:25.594957', 'step': 5483, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 304], 'flops': 9017733530176}, 'timestamp': '2025-09-10 02:27:25.625545', 'step': 5483, 'epoch': 3} {'type': 'loss', 'content': 0.0006368904723785818, 'timestamp': '2025-09-10 02:27:25.658634', 'step': 5484, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 7119318626112}, 'timestamp': '2025-09-10 02:27:25.689629', 'step': 5484, 'epoch': 3} {'type': 'loss', 'content': 0.00018317217472940683, 'timestamp': '2025-09-10 02:27:25.694781', 'step': 5485, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 176], 'flops': 5220903722048}, 'timestamp': '2025-09-10 02:27:25.724875', 'step': 5485, 'epoch': 3} {'type': 'loss', 'content': 0.0001097510103136301, 'timestamp': '2025-09-10 02:27:25.728890', 'step': 5486, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 304], 'flops': 9017733530176}, 'timestamp': '2025-09-10 02:27:25.760354', 'step': 5486, 'epoch': 3} {'type': 'loss', 'content': 0.0001433340657968074, 'timestamp': '2025-09-10 02:27:25.772468', 'step': 5487, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 7119318626112}, 'timestamp': '2025-09-10 02:27:25.802978', 'step': 5487, 'epoch': 3} {'type': 'loss', 'content': 0.0005603828467428684, 'timestamp': '2025-09-10 02:27:25.831313', 'step': 5488, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 7119318626112}, 'timestamp': '2025-09-10 02:27:25.861933', 'step': 5488, 'epoch': 3} {'type': 'loss', 'content': 0.0009804172441363335, 'timestamp': '2025-09-10 02:27:25.867054', 'step': 5489, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 5695507448064}, 'timestamp': '2025-09-10 02:27:25.896960', 'step': 5489, 'epoch': 3} {'type': 'loss', 'content': 0.029433060437440872, 'timestamp': '2025-09-10 02:27:25.901378', 'step': 5490, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 6644714900096}, 'timestamp': '2025-09-10 02:27:25.932233', 'step': 5490, 'epoch': 3} {'type': 'loss', 'content': 0.0031838512513786554, 'timestamp': '2025-09-10 02:27:25.938904', 'step': 5491, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 8543129804160}, 'timestamp': '2025-09-10 02:27:25.971123', 'step': 5491, 'epoch': 3} {'type': 'loss', 'content': 0.0012118567246943712, 'timestamp': '2025-09-10 02:27:26.003044', 'step': 5492, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 6644714900096}, 'timestamp': '2025-09-10 02:27:26.033688', 'step': 5492, 'epoch': 3} {'type': 'loss', 'content': 7.553322211606428e-05, 'timestamp': '2025-09-10 02:27:26.038348', 'step': 5493, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 6170111174080}, 'timestamp': '2025-09-10 02:27:26.068370', 'step': 5493, 'epoch': 3} {'type': 'loss', 'content': 0.0006994387367740273, 'timestamp': '2025-09-10 02:27:26.075270', 'step': 5494, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 7593922352128}, 'timestamp': '2025-09-10 02:27:26.105413', 'step': 5494, 'epoch': 3} {'type': 'loss', 'content': 0.00013065806706435978, 'timestamp': '2025-09-10 02:27:26.113048', 'step': 5495, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 352], 'flops': 10441544708224}, 'timestamp': '2025-09-10 02:27:26.147661', 'step': 5495, 'epoch': 3} {'type': 'loss', 'content': 0.0004590437456499785, 'timestamp': '2025-09-10 02:27:26.182277', 'step': 5496, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 6644714900096}, 'timestamp': '2025-09-10 02:27:26.214107', 'step': 5496, 'epoch': 3} {'type': 'loss', 'content': 0.00011019222438335419, 'timestamp': '2025-09-10 02:27:26.218755', 'step': 5497, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 416], 'flops': 12339959612288}, 'timestamp': '2025-09-10 02:27:26.256942', 'step': 5497, 'epoch': 3} {'type': 'loss', 'content': 0.0003960966714657843, 'timestamp': '2025-09-10 02:27:26.272845', 'step': 5498, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 304], 'flops': 9017733530176}, 'timestamp': '2025-09-10 02:27:26.303273', 'step': 5498, 'epoch': 3} {'type': 'loss', 'content': 0.0002474244683980942, 'timestamp': '2025-09-10 02:27:26.315523', 'step': 5499, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 400], 'flops': 11865355886272}, 'timestamp': '2025-09-10 02:27:26.352576', 'step': 5499, 'epoch': 3} {'type': 'loss', 'content': 0.017456304281949997, 'timestamp': '2025-09-10 02:27:26.389120', 'step': 5500, 'epoch': 3} {'type': 'info', 'content': 'Checkpoint saved at step 5500', 'timestamp': '2025-09-10 02:27:31.536850', 'step': 5500, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 6644714900096}, 'timestamp': '2025-09-10 02:27:31.579077', 'step': 5500, 'epoch': 3} {'type': 'loss', 'content': 0.002106861909851432, 'timestamp': '2025-09-10 02:27:31.582270', 'step': 5501, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 368], 'flops': 10916148434240}, 'timestamp': '2025-09-10 02:27:31.629193', 'step': 5501, 'epoch': 3} {'type': 'loss', 'content': 0.00021317604114301503, 'timestamp': '2025-09-10 02:27:31.642886', 'step': 5502, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 6170111174080}, 'timestamp': '2025-09-10 02:27:31.674071', 'step': 5502, 'epoch': 3} {'type': 'loss', 'content': 0.00021606599329970777, 'timestamp': '2025-09-10 02:27:31.680617', 'step': 5503, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 6644714900096}, 'timestamp': '2025-09-10 02:27:31.710421', 'step': 5503, 'epoch': 3} {'type': 'loss', 'content': 0.0007330483640544116, 'timestamp': '2025-09-10 02:27:31.737988', 'step': 5504, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 528], 'flops': 15662185694400}, 'timestamp': '2025-09-10 02:27:31.781738', 'step': 5504, 'epoch': 3} {'type': 'loss', 'content': 0.000325193686876446, 'timestamp': '2025-09-10 02:27:31.800720', 'step': 5505, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 7119318626112}, 'timestamp': '2025-09-10 02:27:31.833182', 'step': 5505, 'epoch': 3} {'type': 'loss', 'content': 0.0002977077674586326, 'timestamp': '2025-09-10 02:27:31.840308', 'step': 5506, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 6644714900096}, 'timestamp': '2025-09-10 02:27:31.871361', 'step': 5506, 'epoch': 3} {'type': 'loss', 'content': 0.0009078510920517147, 'timestamp': '2025-09-10 02:27:31.878302', 'step': 5507, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 7119318626112}, 'timestamp': '2025-09-10 02:27:31.909839', 'step': 5507, 'epoch': 3} {'type': 'loss', 'content': 0.005771205294877291, 'timestamp': '2025-09-10 02:27:31.938113', 'step': 5508, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 6644714900096}, 'timestamp': '2025-09-10 02:27:31.968598', 'step': 5508, 'epoch': 3} {'type': 'loss', 'content': 5.6392182159470394e-05, 'timestamp': '2025-09-10 02:27:31.972981', 'step': 5509, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 336], 'flops': 9966940982208}, 'timestamp': '2025-09-10 02:27:32.006458', 'step': 5509, 'epoch': 3} {'type': 'loss', 'content': 0.0001654118241276592, 'timestamp': '2025-09-10 02:27:32.019865', 'step': 5510, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 352], 'flops': 10441544708224}, 'timestamp': '2025-09-10 02:27:32.059249', 'step': 5510, 'epoch': 3} {'type': 'loss', 'content': 0.0002515481901355088, 'timestamp': '2025-09-10 02:27:32.072881', 'step': 5511, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 7119318626112}, 'timestamp': '2025-09-10 02:27:32.104293', 'step': 5511, 'epoch': 3} {'type': 'loss', 'content': 0.0001816750009311363, 'timestamp': '2025-09-10 02:27:32.132676', 'step': 5512, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 176], 'flops': 5220903722048}, 'timestamp': '2025-09-10 02:27:32.162778', 'step': 5512, 'epoch': 3} {'type': 'loss', 'content': 0.00022820988669991493, 'timestamp': '2025-09-10 02:27:32.164857', 'step': 5513, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 4746299996032}, 'timestamp': '2025-09-10 02:27:32.194948', 'step': 5513, 'epoch': 3} {'type': 'loss', 'content': 0.0005616779671981931, 'timestamp': '2025-09-10 02:27:32.197489', 'step': 5514, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-10 02:27:32.227397', 'step': 5514, 'epoch': 3} {'type': 'loss', 'content': 7.715394895058125e-05, 'timestamp': '2025-09-10 02:27:32.229522', 'step': 5515, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 320], 'flops': 9492337256192}, 'timestamp': '2025-09-10 02:27:32.259843', 'step': 5515, 'epoch': 3} {'type': 'loss', 'content': 0.003506176406517625, 'timestamp': '2025-09-10 02:27:32.293269', 'step': 5516, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 4746299996032}, 'timestamp': '2025-09-10 02:27:32.323168', 'step': 5516, 'epoch': 3} {'type': 'loss', 'content': 0.0001584869751241058, 'timestamp': '2025-09-10 02:27:32.325243', 'step': 5517, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 352], 'flops': 10441544708224}, 'timestamp': '2025-09-10 02:27:32.358806', 'step': 5517, 'epoch': 3} {'type': 'loss', 'content': 0.0006221079966053367, 'timestamp': '2025-09-10 02:27:32.372521', 'step': 5518, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 6644714900096}, 'timestamp': '2025-09-10 02:27:32.403453', 'step': 5518, 'epoch': 3} {'type': 'loss', 'content': 4.248010372975841e-05, 'timestamp': '2025-09-10 02:27:32.410382', 'step': 5519, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 8543129804160}, 'timestamp': '2025-09-10 02:27:32.440771', 'step': 5519, 'epoch': 3} {'type': 'loss', 'content': 0.0002929776383098215, 'timestamp': '2025-09-10 02:27:32.472429', 'step': 5520, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 6170111174080}, 'timestamp': '2025-09-10 02:27:32.506997', 'step': 5520, 'epoch': 3} {'type': 'loss', 'content': 0.0004986113053746521, 'timestamp': '2025-09-10 02:27:32.508976', 'step': 5521, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 6170111174080}, 'timestamp': '2025-09-10 02:27:32.539000', 'step': 5521, 'epoch': 3} {'type': 'loss', 'content': 0.00017840255168266594, 'timestamp': '2025-09-10 02:27:32.545758', 'step': 5522, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 6644714900096}, 'timestamp': '2025-09-10 02:27:32.578145', 'step': 5522, 'epoch': 3} {'type': 'loss', 'content': 7.241130515467376e-05, 'timestamp': '2025-09-10 02:27:32.584978', 'step': 5523, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 4746299996032}, 'timestamp': '2025-09-10 02:27:32.618132', 'step': 5523, 'epoch': 3} {'type': 'loss', 'content': 0.0008913822821341455, 'timestamp': '2025-09-10 02:27:32.641737', 'step': 5524, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 7119318626112}, 'timestamp': '2025-09-10 02:27:32.672034', 'step': 5524, 'epoch': 3} {'type': 'loss', 'content': 0.00020360689086373895, 'timestamp': '2025-09-10 02:27:32.677021', 'step': 5525, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 352], 'flops': 10441544708224}, 'timestamp': '2025-09-10 02:27:32.710737', 'step': 5525, 'epoch': 3} {'type': 'loss', 'content': 0.00012081407476216555, 'timestamp': '2025-09-10 02:27:32.724427', 'step': 5526, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 6644714900096}, 'timestamp': '2025-09-10 02:27:32.754305', 'step': 5526, 'epoch': 3} {'type': 'loss', 'content': 0.0008983220905065536, 'timestamp': '2025-09-10 02:27:32.761358', 'step': 5527, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 176], 'flops': 5220903722048}, 'timestamp': '2025-09-10 02:27:32.791412', 'step': 5527, 'epoch': 3} {'type': 'loss', 'content': 0.00018443951557856053, 'timestamp': '2025-09-10 02:27:32.816320', 'step': 5528, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 8068526078144}, 'timestamp': '2025-09-10 02:27:32.847149', 'step': 5528, 'epoch': 3} {'type': 'loss', 'content': 0.00016453374701086432, 'timestamp': '2025-09-10 02:27:32.855023', 'step': 5529, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 8068526078144}, 'timestamp': '2025-09-10 02:27:32.886789', 'step': 5529, 'epoch': 3} {'type': 'loss', 'content': 0.00020828154811169952, 'timestamp': '2025-09-10 02:27:32.897005', 'step': 5530, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 352], 'flops': 10441544708224}, 'timestamp': '2025-09-10 02:27:32.932004', 'step': 5530, 'epoch': 3} {'type': 'loss', 'content': 0.0003416097315493971, 'timestamp': '2025-09-10 02:27:32.945746', 'step': 5531, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 336], 'flops': 9966940982208}, 'timestamp': '2025-09-10 02:27:32.978891', 'step': 5531, 'epoch': 3} {'type': 'loss', 'content': 0.0005632195970974863, 'timestamp': '2025-09-10 02:27:33.013047', 'step': 5532, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 8068526078144}, 'timestamp': '2025-09-10 02:27:33.044070', 'step': 5532, 'epoch': 3} {'type': 'loss', 'content': 0.005745100323110819, 'timestamp': '2025-09-10 02:27:33.051515', 'step': 5533, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 5695507448064}, 'timestamp': '2025-09-10 02:27:33.084358', 'step': 5533, 'epoch': 3} {'type': 'loss', 'content': 0.000916880089789629, 'timestamp': '2025-09-10 02:27:33.088706', 'step': 5534, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 6170111174080}, 'timestamp': '2025-09-10 02:27:33.118543', 'step': 5534, 'epoch': 3} {'type': 'loss', 'content': 0.00013511795259546489, 'timestamp': '2025-09-10 02:27:33.125244', 'step': 5535, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 7593922352128}, 'timestamp': '2025-09-10 02:27:33.156478', 'step': 5535, 'epoch': 3} {'type': 'loss', 'content': 0.011048159562051296, 'timestamp': '2025-09-10 02:27:33.184951', 'step': 5536, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:27:33.214401', 'step': 5536, 'epoch': 3} {'type': 'loss', 'content': 0.0008397336350753903, 'timestamp': '2025-09-10 02:27:33.216071', 'step': 5537, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 5695507448064}, 'timestamp': '2025-09-10 02:27:33.245476', 'step': 5537, 'epoch': 3} {'type': 'loss', 'content': 0.0011787625262513757, 'timestamp': '2025-09-10 02:27:33.249811', 'step': 5538, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 8068526078144}, 'timestamp': '2025-09-10 02:27:33.281454', 'step': 5538, 'epoch': 3} {'type': 'loss', 'content': 0.0012026058975607157, 'timestamp': '2025-09-10 02:27:33.291796', 'step': 5539, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 176], 'flops': 5220903722048}, 'timestamp': '2025-09-10 02:27:33.321747', 'step': 5539, 'epoch': 3} {'type': 'loss', 'content': 0.03373594582080841, 'timestamp': '2025-09-10 02:27:33.346776', 'step': 5540, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 5695507448064}, 'timestamp': '2025-09-10 02:27:33.376340', 'step': 5540, 'epoch': 3} {'type': 'loss', 'content': 0.00018737561185844243, 'timestamp': '2025-09-10 02:27:33.380012', 'step': 5541, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 6644714900096}, 'timestamp': '2025-09-10 02:27:33.414380', 'step': 5541, 'epoch': 3} {'type': 'loss', 'content': 5.8746190916281193e-05, 'timestamp': '2025-09-10 02:27:33.421294', 'step': 5542, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 304], 'flops': 9017733530176}, 'timestamp': '2025-09-10 02:27:33.451445', 'step': 5542, 'epoch': 3} {'type': 'loss', 'content': 0.03155756741762161, 'timestamp': '2025-09-10 02:27:33.463526', 'step': 5543, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 7593922352128}, 'timestamp': '2025-09-10 02:27:33.494717', 'step': 5543, 'epoch': 3} {'type': 'loss', 'content': 0.0006670065922662616, 'timestamp': '2025-09-10 02:27:33.523226', 'step': 5544, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 6644714900096}, 'timestamp': '2025-09-10 02:27:33.554422', 'step': 5544, 'epoch': 3} {'type': 'loss', 'content': 0.04387173801660538, 'timestamp': '2025-09-10 02:27:33.559135', 'step': 5545, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 320], 'flops': 9492337256192}, 'timestamp': '2025-09-10 02:27:33.593139', 'step': 5545, 'epoch': 3} {'type': 'loss', 'content': 0.00019743894517887384, 'timestamp': '2025-09-10 02:27:33.605635', 'step': 5546, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 8068526078144}, 'timestamp': '2025-09-10 02:27:33.639149', 'step': 5546, 'epoch': 3} {'type': 'loss', 'content': 6.449820648413152e-05, 'timestamp': '2025-09-10 02:27:33.648609', 'step': 5547, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 6644714900096}, 'timestamp': '2025-09-10 02:27:33.680150', 'step': 5547, 'epoch': 3} {'type': 'loss', 'content': 0.011188429780304432, 'timestamp': '2025-09-10 02:27:33.707910', 'step': 5548, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 7119318626112}, 'timestamp': '2025-09-10 02:27:33.738430', 'step': 5548, 'epoch': 3} {'type': 'loss', 'content': 0.0005150972865521908, 'timestamp': '2025-09-10 02:27:33.743746', 'step': 5549, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 7119318626112}, 'timestamp': '2025-09-10 02:27:33.778426', 'step': 5549, 'epoch': 3} {'type': 'loss', 'content': 0.0003233766183257103, 'timestamp': '2025-09-10 02:27:33.785737', 'step': 5550, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 304], 'flops': 9017733530176}, 'timestamp': '2025-09-10 02:27:33.819092', 'step': 5550, 'epoch': 3} {'type': 'loss', 'content': 0.00020095381478313357, 'timestamp': '2025-09-10 02:27:33.831112', 'step': 5551, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 416], 'flops': 12339959612288}, 'timestamp': '2025-09-10 02:27:33.870353', 'step': 5551, 'epoch': 3} {'type': 'loss', 'content': 0.00017904266132973135, 'timestamp': '2025-09-10 02:27:33.907089', 'step': 5552, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 5695507448064}, 'timestamp': '2025-09-10 02:27:33.941168', 'step': 5552, 'epoch': 3} {'type': 'loss', 'content': 3.078414738411084e-05, 'timestamp': '2025-09-10 02:27:33.943772', 'step': 5553, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 6644714900096}, 'timestamp': '2025-09-10 02:27:33.976793', 'step': 5553, 'epoch': 3} {'type': 'loss', 'content': 0.0003823090228252113, 'timestamp': '2025-09-10 02:27:33.983643', 'step': 5554, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 480], 'flops': 14238374516352}, 'timestamp': '2025-09-10 02:27:34.025232', 'step': 5554, 'epoch': 3} {'type': 'loss', 'content': 0.0005709293182007968, 'timestamp': '2025-09-10 02:27:34.042566', 'step': 5555, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 6644714900096}, 'timestamp': '2025-09-10 02:27:34.074986', 'step': 5555, 'epoch': 3} {'type': 'loss', 'content': 0.00023254666302818805, 'timestamp': '2025-09-10 02:27:34.102934', 'step': 5556, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 4746299996032}, 'timestamp': '2025-09-10 02:27:34.135109', 'step': 5556, 'epoch': 3} {'type': 'loss', 'content': 0.00022391592210624367, 'timestamp': '2025-09-10 02:27:34.137096', 'step': 5557, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 7593922352128}, 'timestamp': '2025-09-10 02:27:34.170741', 'step': 5557, 'epoch': 3} {'type': 'loss', 'content': 0.015340792946517467, 'timestamp': '2025-09-10 02:27:34.178292', 'step': 5558, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 432], 'flops': 12814563338304}, 'timestamp': '2025-09-10 02:27:34.219562', 'step': 5558, 'epoch': 3} {'type': 'loss', 'content': 0.001337612047791481, 'timestamp': '2025-09-10 02:27:34.235720', 'step': 5559, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 352], 'flops': 10441544708224}, 'timestamp': '2025-09-10 02:27:34.272146', 'step': 5559, 'epoch': 3} {'type': 'loss', 'content': 0.0005941048148088157, 'timestamp': '2025-09-10 02:27:34.306749', 'step': 5560, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 384], 'flops': 11390752160256}, 'timestamp': '2025-09-10 02:27:34.340343', 'step': 5560, 'epoch': 3} {'type': 'loss', 'content': 0.00025993268354795873, 'timestamp': '2025-09-10 02:27:34.353688', 'step': 5561, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 176], 'flops': 5220903722048}, 'timestamp': '2025-09-10 02:27:34.385854', 'step': 5561, 'epoch': 3} {'type': 'loss', 'content': 0.000905154156498611, 'timestamp': '2025-09-10 02:27:34.389899', 'step': 5562, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 5695507448064}, 'timestamp': '2025-09-10 02:27:34.428220', 'step': 5562, 'epoch': 3} {'type': 'loss', 'content': 0.0006523271440528333, 'timestamp': '2025-09-10 02:27:34.432679', 'step': 5563, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 8543129804160}, 'timestamp': '2025-09-10 02:27:34.474553', 'step': 5563, 'epoch': 3} {'type': 'loss', 'content': 7.019279291853309e-05, 'timestamp': '2025-09-10 02:27:34.506256', 'step': 5564, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 6170111174080}, 'timestamp': '2025-09-10 02:27:34.536315', 'step': 5564, 'epoch': 3} {'type': 'loss', 'content': 5.2420513384277e-05, 'timestamp': '2025-09-10 02:27:34.540790', 'step': 5565, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 8068526078144}, 'timestamp': '2025-09-10 02:27:34.571997', 'step': 5565, 'epoch': 3} {'type': 'loss', 'content': 0.00020063482224941254, 'timestamp': '2025-09-10 02:27:34.581502', 'step': 5566, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 480], 'flops': 14238374516352}, 'timestamp': '2025-09-10 02:27:34.632019', 'step': 5566, 'epoch': 3} {'type': 'loss', 'content': 0.0006023825262673199, 'timestamp': '2025-09-10 02:27:34.649384', 'step': 5567, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 416], 'flops': 12339959612288}, 'timestamp': '2025-09-10 02:27:34.689284', 'step': 5567, 'epoch': 3} {'type': 'loss', 'content': 0.0001301374431932345, 'timestamp': '2025-09-10 02:27:34.726061', 'step': 5568, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 6170111174080}, 'timestamp': '2025-09-10 02:27:34.758512', 'step': 5568, 'epoch': 3} {'type': 'loss', 'content': 0.000419637217419222, 'timestamp': '2025-09-10 02:27:34.762345', 'step': 5569, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 8068526078144}, 'timestamp': '2025-09-10 02:27:34.793133', 'step': 5569, 'epoch': 3} {'type': 'loss', 'content': 0.00030135762062855065, 'timestamp': '2025-09-10 02:27:34.802810', 'step': 5570, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 304], 'flops': 9017733530176}, 'timestamp': '2025-09-10 02:27:34.835555', 'step': 5570, 'epoch': 3} {'type': 'loss', 'content': 9.938400035025552e-05, 'timestamp': '2025-09-10 02:27:34.847463', 'step': 5571, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 6644714900096}, 'timestamp': '2025-09-10 02:27:34.878126', 'step': 5571, 'epoch': 3} {'type': 'loss', 'content': 0.001446371665224433, 'timestamp': '2025-09-10 02:27:34.905980', 'step': 5572, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 6644714900096}, 'timestamp': '2025-09-10 02:27:34.937476', 'step': 5572, 'epoch': 3} {'type': 'loss', 'content': 0.0001281161530641839, 'timestamp': '2025-09-10 02:27:34.942245', 'step': 5573, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 384], 'flops': 11390752160256}, 'timestamp': '2025-09-10 02:27:34.976733', 'step': 5573, 'epoch': 3} {'type': 'loss', 'content': 0.00013364390179049224, 'timestamp': '2025-09-10 02:27:34.990743', 'step': 5574, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 336], 'flops': 9966940982208}, 'timestamp': '2025-09-10 02:27:35.024991', 'step': 5574, 'epoch': 3} {'type': 'loss', 'content': 0.0001239602715941146, 'timestamp': '2025-09-10 02:27:35.038337', 'step': 5575, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 6644714900096}, 'timestamp': '2025-09-10 02:27:35.069263', 'step': 5575, 'epoch': 3} {'type': 'loss', 'content': 0.00047443489893339574, 'timestamp': '2025-09-10 02:27:35.097251', 'step': 5576, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 5695507448064}, 'timestamp': '2025-09-10 02:27:35.127534', 'step': 5576, 'epoch': 3} {'type': 'loss', 'content': 0.00024012614449020475, 'timestamp': '2025-09-10 02:27:35.130325', 'step': 5577, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 6170111174080}, 'timestamp': '2025-09-10 02:27:35.160617', 'step': 5577, 'epoch': 3} {'type': 'loss', 'content': 0.0003875931433867663, 'timestamp': '2025-09-10 02:27:35.167435', 'step': 5578, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 8543129804160}, 'timestamp': '2025-09-10 02:27:35.197741', 'step': 5578, 'epoch': 3} {'type': 'loss', 'content': 0.0003180662461090833, 'timestamp': '2025-09-10 02:27:35.208661', 'step': 5579, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 6170111174080}, 'timestamp': '2025-09-10 02:27:35.239848', 'step': 5579, 'epoch': 3} {'type': 'loss', 'content': 0.00013746933836955577, 'timestamp': '2025-09-10 02:27:35.267537', 'step': 5580, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 7593922352128}, 'timestamp': '2025-09-10 02:27:35.298635', 'step': 5580, 'epoch': 3} {'type': 'loss', 'content': 0.005690049845725298, 'timestamp': '2025-09-10 02:27:35.303999', 'step': 5581, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 6644714900096}, 'timestamp': '2025-09-10 02:27:35.335409', 'step': 5581, 'epoch': 3} {'type': 'loss', 'content': 0.00014996285608503968, 'timestamp': '2025-09-10 02:27:35.342161', 'step': 5582, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 6170111174080}, 'timestamp': '2025-09-10 02:27:35.374179', 'step': 5582, 'epoch': 3} {'type': 'loss', 'content': 0.00021600407490041107, 'timestamp': '2025-09-10 02:27:35.380733', 'step': 5583, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 5695507448064}, 'timestamp': '2025-09-10 02:27:35.411851', 'step': 5583, 'epoch': 3} {'type': 'loss', 'content': 0.0007192405755631626, 'timestamp': '2025-09-10 02:27:35.436830', 'step': 5584, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 7593922352128}, 'timestamp': '2025-09-10 02:27:35.467895', 'step': 5584, 'epoch': 3} {'type': 'loss', 'content': 0.0003458319406490773, 'timestamp': '2025-09-10 02:27:35.472981', 'step': 5585, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 4746299996032}, 'timestamp': '2025-09-10 02:27:35.503975', 'step': 5585, 'epoch': 3} {'type': 'loss', 'content': 0.0002480298571754247, 'timestamp': '2025-09-10 02:27:35.506323', 'step': 5586, 'epoch': 3} {'type': 'flops', 'content': [{'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5695213340928}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3796808916480}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4429610391296}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4746011128704}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4746011128704}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6011614078336}, {'type': 'perplexity', 'in_batch_dim': [8, 736], 'batch_size': 8, 'flops': 14554433988352}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3796808916480}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4113209653888}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3796808916480}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4746011128704}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2847606704256}, {'type': 'perplexity', 'in_batch_dim': [8, 320], 'batch_size': 8, 'flops': 6328014815744}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5378812603520}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3796808916480}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4746011128704}, {'type': 'perplexity', 'in_batch_dim': [8, 352], 'batch_size': 8, 'flops': 6960816290560}, {'type': 'perplexity', 'in_batch_dim': [8, 384], 'batch_size': 8, 'flops': 7593617765376}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5378812603520}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4429610391296}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4113209653888}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5062411866112}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5062411866112}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4429610391296}, {'type': 'perplexity', 'in_batch_dim': [8, 480], 'batch_size': 8, 'flops': 9492022189824}, {'type': 'perplexity', 'in_batch_dim': [8, 448], 'batch_size': 8, 'flops': 8859220715008}, {'type': 'perplexity', 'in_batch_dim': [8, 320], 'batch_size': 8, 'flops': 6328014815744}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3796808916480}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6011614078336}, {'type': 'perplexity', 'in_batch_dim': [8, 320], 'batch_size': 8, 'flops': 6328014815744}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5695213340928}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4746011128704}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4429610391296}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5378812603520}, {'type': 'perplexity', 'in_batch_dim': [8, 544], 'batch_size': 8, 'flops': 10757625139456}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4429610391296}, {'type': 'perplexity', 'in_batch_dim': [8, 320], 'batch_size': 8, 'flops': 6328014815744}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4113209653888}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4113209653888}, {'type': 'perplexity', 'in_batch_dim': [8, 320], 'batch_size': 8, 'flops': 6328014815744}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3480408179072}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5062411866112}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6011614078336}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4429610391296}, {'type': 'perplexity', 'in_batch_dim': [8, 384], 'batch_size': 8, 'flops': 7593617765376}, {'type': 'perplexity', 'in_batch_dim': [8, 416], 'batch_size': 8, 'flops': 8226419240192}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4746011128704}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3480408179072}, {'type': 'perplexity', 'in_batch_dim': [8, 384], 'batch_size': 8, 'flops': 7593617765376}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3480408179072}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3480408179072}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5378812603520}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3796808916480}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4113209653888}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5062411866112}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5378812603520}, {'type': 'perplexity', 'in_batch_dim': [8, 320], 'batch_size': 8, 'flops': 6328014815744}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4429610391296}, {'type': 'perplexity', 'in_batch_dim': [8, 160], 'batch_size': 8, 'flops': 3164007441664}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4113209653888}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5695213340928}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3796808916480}, {'type': 'perplexity', 'in_batch_dim': [8, 160], 'batch_size': 8, 'flops': 3164007441664}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6011614078336}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5062411866112}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4429610391296}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3480408179072}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4746011128704}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3480408179072}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4113209653888}, {'type': 'perplexity', 'in_batch_dim': [8, 624], 'batch_size': 8, 'flops': 12339628826496}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4113209653888}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5695213340928}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5062411866112}, {'type': 'perplexity', 'in_batch_dim': [8, 384], 'batch_size': 8, 'flops': 7593617765376}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4429610391296}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4113209653888}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5062411866112}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4429610391296}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5695213340928}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3796808916480}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5378812603520}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4429610391296}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5378812603520}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3796808916480}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5695213340928}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4429610391296}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5062411866112}, {'type': 'perplexity', 'in_batch_dim': [8, 160], 'batch_size': 8, 'flops': 3164007441664}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4746011128704}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5695213340928}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3796808916480}, {'type': 'perplexity', 'in_batch_dim': [8, 416], 'batch_size': 8, 'flops': 8226419240192}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3796808916480}, {'type': 'perplexity', 'in_batch_dim': [8, 160], 'batch_size': 8, 'flops': 3164007441664}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4746011128704}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3796808916480}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2847606704256}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5378812603520}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4429610391296}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4113209653888}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4429610391296}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6011614078336}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4113209653888}, {'type': 'perplexity', 'in_batch_dim': [8, 320], 'batch_size': 8, 'flops': 6328014815744}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4746011128704}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4746011128704}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3796808916480}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5062411866112}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3480408179072}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3796808916480}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3480408179072}, {'type': 'perplexity', 'in_batch_dim': [8, 352], 'batch_size': 8, 'flops': 6960816290560}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5062411866112}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4429610391296}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6011614078336}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5062411866112}, {'type': 'perplexity', 'in_batch_dim': [8, 576], 'batch_size': 8, 'flops': 11390426614272}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4429610391296}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3796808916480}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6011614078336}, {'type': 'perplexity', 'in_batch_dim': [8, 432], 'batch_size': 8, 'flops': 8542819977600}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5378812603520}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5378812603520}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5062411866112}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4113209653888}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4113209653888}, {'type': 'perplexity', 'in_batch_dim': [8, 368], 'batch_size': 8, 'flops': 7277217027968}, {'type': 'perplexity', 'in_batch_dim': [8, 160], 'batch_size': 8, 'flops': 3164007441664}, {'type': 'perplexity', 'in_batch_dim': [8, 368], 'batch_size': 8, 'flops': 7277217027968}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3796808916480}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4746011128704}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4113209653888}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4429610391296}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5695213340928}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5695213340928}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5062411866112}, {'type': 'perplexity', 'in_batch_dim': [8, 432], 'batch_size': 8, 'flops': 8542819977600}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5062411866112}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4113209653888}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3480408179072}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3480408179072}, {'type': 'perplexity', 'in_batch_dim': [8, 160], 'batch_size': 8, 'flops': 3164007441664}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4113209653888}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3796808916480}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5062411866112}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 368], 'batch_size': 8, 'flops': 7277217027968}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5378812603520}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5378812603520}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5062411866112}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2847606704256}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4429610391296}, {'type': 'perplexity', 'in_batch_dim': [8, 320], 'batch_size': 8, 'flops': 6328014815744}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3796808916480}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4429610391296}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4113209653888}, {'type': 'perplexity', 'in_batch_dim': [8, 384], 'batch_size': 8, 'flops': 7593617765376}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5378812603520}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4746011128704}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3480408179072}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5062411866112}, {'type': 'perplexity', 'in_batch_dim': [8, 320], 'batch_size': 8, 'flops': 6328014815744}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5378812603520}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4113209653888}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4113209653888}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5695213340928}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3480408179072}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6011614078336}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4746011128704}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5378812603520}, {'type': 'perplexity', 'in_batch_dim': [8, 320], 'batch_size': 8, 'flops': 6328014815744}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4429610391296}, {'type': 'perplexity', 'in_batch_dim': [8, 320], 'batch_size': 8, 'flops': 6328014815744}, {'type': 'perplexity', 'in_batch_dim': [8, 416], 'batch_size': 8, 'flops': 8226419240192}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5062411866112}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5062411866112}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2847606704256}, {'type': 'perplexity', 'in_batch_dim': [8, 336], 'batch_size': 8, 'flops': 6644415553152}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6011614078336}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5062411866112}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4113209653888}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5695213340928}, {'type': 'perplexity', 'in_batch_dim': [8, 416], 'batch_size': 8, 'flops': 8226419240192}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3796808916480}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5695213340928}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4746011128704}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4429610391296}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4113209653888}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3480408179072}, {'type': 'perplexity', 'in_batch_dim': [8, 320], 'batch_size': 8, 'flops': 6328014815744}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3480408179072}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5378812603520}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6011614078336}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4429610391296}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5695213340928}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5695213340928}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3480408179072}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4429610391296}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5062411866112}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3480408179072}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4113209653888}, {'type': 'perplexity', 'in_batch_dim': [8, 352], 'batch_size': 8, 'flops': 6960816290560}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4113209653888}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2847606704256}, {'type': 'perplexity', 'in_batch_dim': [8, 160], 'batch_size': 8, 'flops': 3164007441664}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3796808916480}, {'type': 'perplexity', 'in_batch_dim': [8, 496], 'batch_size': 8, 'flops': 9808422927232}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3480408179072}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4113209653888}, {'type': 'perplexity', 'in_batch_dim': [8, 544], 'batch_size': 8, 'flops': 10757625139456}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6011614078336}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5378812603520}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5695213340928}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4746011128704}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5695213340928}, {'type': 'perplexity', 'in_batch_dim': [8, 384], 'batch_size': 8, 'flops': 7593617765376}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3480408179072}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5378812603520}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3480408179072}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5378812603520}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5062411866112}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3796808916480}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5062411866112}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5695213340928}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5378812603520}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4429610391296}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5062411866112}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3480408179072}, {'type': 'perplexity', 'in_batch_dim': [8, 656], 'batch_size': 8, 'flops': 12972430301312}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5062411866112}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3796808916480}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3480408179072}, {'type': 'perplexity', 'in_batch_dim': [8, 160], 'batch_size': 8, 'flops': 3164007441664}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5695213340928}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6011614078336}, {'type': 'perplexity', 'in_batch_dim': [8, 320], 'batch_size': 8, 'flops': 6328014815744}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5062411866112}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5062411866112}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5695213340928}, {'type': 'perplexity', 'in_batch_dim': [8, 320], 'batch_size': 8, 'flops': 6328014815744}, {'type': 'perplexity', 'in_batch_dim': [8, 384], 'batch_size': 8, 'flops': 7593617765376}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5062411866112}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4746011128704}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4113209653888}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4746011128704}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4746011128704}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3480408179072}, {'type': 'perplexity', 'in_batch_dim': [8, 320], 'batch_size': 8, 'flops': 6328014815744}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5695213340928}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5062411866112}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4113209653888}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4746011128704}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2847606704256}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3796808916480}, {'type': 'perplexity', 'in_batch_dim': [8, 336], 'batch_size': 8, 'flops': 6644415553152}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4113209653888}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4746011128704}, {'type': 'perplexity', 'in_batch_dim': [8, 496], 'batch_size': 8, 'flops': 9808422927232}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5378812603520}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5378812603520}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4429610391296}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5062411866112}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6011614078336}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4746011128704}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3796808916480}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4113209653888}, {'type': 'perplexity', 'in_batch_dim': [8, 496], 'batch_size': 8, 'flops': 9808422927232}, {'type': 'perplexity', 'in_batch_dim': [8, 448], 'batch_size': 8, 'flops': 8859220715008}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6011614078336}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3796808916480}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3480408179072}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5062411866112}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4429610391296}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4113209653888}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4746011128704}, {'type': 'perplexity', 'in_batch_dim': [8, 320], 'batch_size': 8, 'flops': 6328014815744}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6011614078336}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3480408179072}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3796808916480}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3796808916480}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4746011128704}, {'type': 'perplexity', 'in_batch_dim': [8, 416], 'batch_size': 8, 'flops': 8226419240192}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5695213340928}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6011614078336}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6011614078336}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4746011128704}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3480408179072}, {'type': 'perplexity', 'in_batch_dim': [8, 160], 'batch_size': 8, 'flops': 3164007441664}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5695213340928}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3480408179072}, {'type': 'perplexity', 'in_batch_dim': [8, 400], 'batch_size': 8, 'flops': 7910018502784}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4429610391296}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5378812603520}, {'type': 'perplexity', 'in_batch_dim': [8, 400], 'batch_size': 8, 'flops': 7910018502784}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5378812603520}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3480408179072}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6011614078336}, {'type': 'perplexity', 'in_batch_dim': [8, 160], 'batch_size': 8, 'flops': 3164007441664}, {'type': 'perplexity', 'in_batch_dim': [8, 352], 'batch_size': 8, 'flops': 6960816290560}, {'type': 'perplexity', 'in_batch_dim': [8, 320], 'batch_size': 8, 'flops': 6328014815744}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5062411866112}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5062411866112}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4113209653888}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4429610391296}, {'type': 'perplexity', 'in_batch_dim': [8, 352], 'batch_size': 8, 'flops': 6960816290560}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4113209653888}, {'type': 'perplexity', 'in_batch_dim': [8, 160], 'batch_size': 8, 'flops': 3164007441664}, {'type': 'perplexity', 'in_batch_dim': [8, 336], 'batch_size': 8, 'flops': 6644415553152}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4429610391296}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3796808916480}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5378812603520}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5378812603520}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4113209653888}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5378812603520}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3796808916480}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5378812603520}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5378812603520}, {'type': 'perplexity', 'in_batch_dim': [8, 160], 'batch_size': 8, 'flops': 3164007441664}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3480408179072}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6011614078336}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5695213340928}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5378812603520}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5378812603520}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4429610391296}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4746011128704}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6011614078336}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5062411866112}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6011614078336}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5378812603520}, {'type': 'perplexity', 'in_batch_dim': [8, 160], 'batch_size': 8, 'flops': 3164007441664}, {'type': 'perplexity', 'in_batch_dim': [8, 160], 'batch_size': 8, 'flops': 3164007441664}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5062411866112}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5062411866112}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3796808916480}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6011614078336}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4113209653888}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4746011128704}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5378812603520}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3480408179072}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3796808916480}, {'type': 'perplexity', 'in_batch_dim': [8, 320], 'batch_size': 8, 'flops': 6328014815744}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5062411866112}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4746011128704}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5062411866112}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4746011128704}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6011614078336}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3796808916480}, {'type': 'perplexity', 'in_batch_dim': [8, 160], 'batch_size': 8, 'flops': 3164007441664}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2847606704256}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4429610391296}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4746011128704}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5062411866112}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5378812603520}, {'type': 'perplexity', 'in_batch_dim': [8, 464], 'batch_size': 8, 'flops': 9175621452416}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4113209653888}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3480408179072}, {'type': 'perplexity', 'in_batch_dim': [8, 384], 'batch_size': 8, 'flops': 7593617765376}, {'type': 'perplexity', 'in_batch_dim': [8, 448], 'batch_size': 8, 'flops': 8859220715008}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5378812603520}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4113209653888}, {'type': 'perplexity', 'in_batch_dim': [8, 320], 'batch_size': 8, 'flops': 6328014815744}, {'type': 'perplexity', 'in_batch_dim': [8, 560], 'batch_size': 8, 'flops': 11074025876864}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5378812603520}, {'type': 'perplexity', 'in_batch_dim': [8, 160], 'batch_size': 8, 'flops': 3164007441664}, {'type': 'perplexity', 'in_batch_dim': [8, 320], 'batch_size': 8, 'flops': 6328014815744}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5062411866112}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6011614078336}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4746011128704}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6011614078336}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3796808916480}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4429610391296}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4113209653888}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4746011128704}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3480408179072}, {'type': 'perplexity', 'in_batch_dim': [8, 384], 'batch_size': 8, 'flops': 7593617765376}, {'type': 'perplexity', 'in_batch_dim': [8, 576], 'batch_size': 8, 'flops': 11390426614272}, {'type': 'perplexity', 'in_batch_dim': [8, 160], 'batch_size': 8, 'flops': 3164007441664}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4746011128704}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4746011128704}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4429610391296}, {'type': 'perplexity', 'in_batch_dim': [8, 336], 'batch_size': 8, 'flops': 6644415553152}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4746011128704}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3796808916480}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3796808916480}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5062411866112}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5378812603520}, {'type': 'perplexity', 'in_batch_dim': [8, 368], 'batch_size': 8, 'flops': 7277217027968}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3480408179072}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4113209653888}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4746011128704}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3480408179072}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4746011128704}, {'type': 'perplexity', 'in_batch_dim': [8, 1168], 'batch_size': 8, 'flops': 23097253898368}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5062411866112}, {'type': 'perplexity', 'in_batch_dim': [8, 160], 'batch_size': 8, 'flops': 3164007441664}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4746011128704}, {'type': 'perplexity', 'in_batch_dim': [8, 640], 'batch_size': 8, 'flops': 12656029563904}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5378812603520}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4429610391296}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4429610391296}, {'type': 'perplexity', 'in_batch_dim': [8, 384], 'batch_size': 8, 'flops': 7593617765376}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3480408179072}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4113209653888}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3796808916480}, {'type': 'perplexity', 'in_batch_dim': [8, 336], 'batch_size': 8, 'flops': 6644415553152}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3480408179072}, {'type': 'perplexity', 'in_batch_dim': [6, 208], 'batch_size': 8, 'flops': 4113209653888}], 'timestamp': '2025-09-10 02:27:45.964600', 'step': 5586, 'epoch': 3} {'type': 'pplx', 'content': 21153895.963864572, 'timestamp': '2025-09-10 02:27:45.968772', 'step': 5586, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 6644714900096}, 'timestamp': '2025-09-10 02:27:45.999647', 'step': 5586, 'epoch': 3} {'type': 'loss', 'content': 0.0005043831770308316, 'timestamp': '2025-09-10 02:27:46.005515', 'step': 5587, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 336], 'flops': 9966940982208}, 'timestamp': '2025-09-10 02:27:46.044554', 'step': 5587, 'epoch': 3} {'type': 'loss', 'content': 0.00019294557569082826, 'timestamp': '2025-09-10 02:27:46.078716', 'step': 5588, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 6644714900096}, 'timestamp': '2025-09-10 02:27:46.110185', 'step': 5588, 'epoch': 3} {'type': 'loss', 'content': 0.0006769891479052603, 'timestamp': '2025-09-10 02:27:46.114567', 'step': 5589, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 7119318626112}, 'timestamp': '2025-09-10 02:27:46.145830', 'step': 5589, 'epoch': 3} {'type': 'loss', 'content': 0.00010750783985713497, 'timestamp': '2025-09-10 02:27:46.153069', 'step': 5590, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 5695507448064}, 'timestamp': '2025-09-10 02:27:46.184008', 'step': 5590, 'epoch': 3} {'type': 'loss', 'content': 0.00029583461582660675, 'timestamp': '2025-09-10 02:27:46.188447', 'step': 5591, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 5695507448064}, 'timestamp': '2025-09-10 02:27:46.218761', 'step': 5591, 'epoch': 3} {'type': 'loss', 'content': 0.0015220470959320664, 'timestamp': '2025-09-10 02:27:46.244206', 'step': 5592, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 8068526078144}, 'timestamp': '2025-09-10 02:27:46.275648', 'step': 5592, 'epoch': 3} {'type': 'loss', 'content': 0.00018189029651694, 'timestamp': '2025-09-10 02:27:46.283544', 'step': 5593, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 6644714900096}, 'timestamp': '2025-09-10 02:27:46.313685', 'step': 5593, 'epoch': 3} {'type': 'loss', 'content': 0.0017844110261648893, 'timestamp': '2025-09-10 02:27:46.320745', 'step': 5594, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 320], 'flops': 9492337256192}, 'timestamp': '2025-09-10 02:27:46.352390', 'step': 5594, 'epoch': 3} {'type': 'loss', 'content': 0.002676573349162936, 'timestamp': '2025-09-10 02:27:46.364916', 'step': 5595, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 8543129804160}, 'timestamp': '2025-09-10 02:27:46.396379', 'step': 5595, 'epoch': 3} {'type': 'loss', 'content': 0.0013415786670520902, 'timestamp': '2025-09-10 02:27:46.428107', 'step': 5596, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 8068526078144}, 'timestamp': '2025-09-10 02:27:46.459638', 'step': 5596, 'epoch': 3} {'type': 'loss', 'content': 0.0009746703435666859, 'timestamp': '2025-09-10 02:27:46.467143', 'step': 5597, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 6170111174080}, 'timestamp': '2025-09-10 02:27:46.498227', 'step': 5597, 'epoch': 3} {'type': 'loss', 'content': 0.0008030079188756645, 'timestamp': '2025-09-10 02:27:46.505002', 'step': 5598, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 8068526078144}, 'timestamp': '2025-09-10 02:27:46.536683', 'step': 5598, 'epoch': 3} {'type': 'loss', 'content': 0.012600510381162167, 'timestamp': '2025-09-10 02:27:46.546818', 'step': 5599, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 7593922352128}, 'timestamp': '2025-09-10 02:27:46.577027', 'step': 5599, 'epoch': 3} {'type': 'loss', 'content': 0.00023556490486953408, 'timestamp': '2025-09-10 02:27:46.605544', 'step': 5600, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 784], 'flops': 23255845310656}, 'timestamp': '2025-09-10 02:27:46.668269', 'step': 5600, 'epoch': 3} {'type': 'loss', 'content': 0.002329291310161352, 'timestamp': '2025-09-10 02:27:46.695192', 'step': 5601, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 320], 'flops': 9492337256192}, 'timestamp': '2025-09-10 02:27:46.727012', 'step': 5601, 'epoch': 3} {'type': 'loss', 'content': 0.0002381420199526474, 'timestamp': '2025-09-10 02:27:46.739574', 'step': 5602, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 176], 'flops': 5220903722048}, 'timestamp': '2025-09-10 02:27:46.770291', 'step': 5602, 'epoch': 3} {'type': 'loss', 'content': 0.00032722530886530876, 'timestamp': '2025-09-10 02:27:46.774173', 'step': 5603, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 8543129804160}, 'timestamp': '2025-09-10 02:27:46.805995', 'step': 5603, 'epoch': 3} {'type': 'loss', 'content': 0.00039383722469210625, 'timestamp': '2025-09-10 02:27:46.837715', 'step': 5604, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 8068526078144}, 'timestamp': '2025-09-10 02:27:46.869559', 'step': 5604, 'epoch': 3} {'type': 'loss', 'content': 0.0016721284482628107, 'timestamp': '2025-09-10 02:27:46.877324', 'step': 5605, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 432], 'flops': 12814563338304}, 'timestamp': '2025-09-10 02:27:46.917602', 'step': 5605, 'epoch': 3} {'type': 'loss', 'content': 0.0009820311097428203, 'timestamp': '2025-09-10 02:27:46.933729', 'step': 5606, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 4746299996032}, 'timestamp': '2025-09-10 02:27:46.968616', 'step': 5606, 'epoch': 3} {'type': 'loss', 'content': 0.0013964021345600486, 'timestamp': '2025-09-10 02:27:46.971092', 'step': 5607, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 6170111174080}, 'timestamp': '2025-09-10 02:27:47.001901', 'step': 5607, 'epoch': 3} {'type': 'loss', 'content': 0.00012165692896815017, 'timestamp': '2025-09-10 02:27:47.029468', 'step': 5608, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 7593922352128}, 'timestamp': '2025-09-10 02:27:47.060590', 'step': 5608, 'epoch': 3} {'type': 'loss', 'content': 0.0014244894264265895, 'timestamp': '2025-09-10 02:27:47.065971', 'step': 5609, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 5695507448064}, 'timestamp': '2025-09-10 02:27:47.097599', 'step': 5609, 'epoch': 3} {'type': 'loss', 'content': 3.5444008972262964e-05, 'timestamp': '2025-09-10 02:27:47.102060', 'step': 5610, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 176], 'flops': 5220903722048}, 'timestamp': '2025-09-10 02:27:47.134406', 'step': 5610, 'epoch': 3} {'type': 'loss', 'content': 0.020029067993164062, 'timestamp': '2025-09-10 02:27:47.141398', 'step': 5611, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 176], 'flops': 5220903722048}, 'timestamp': '2025-09-10 02:27:47.173807', 'step': 5611, 'epoch': 3} {'type': 'loss', 'content': 0.00018498908320907503, 'timestamp': '2025-09-10 02:27:47.198781', 'step': 5612, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 7593922352128}, 'timestamp': '2025-09-10 02:27:47.230234', 'step': 5612, 'epoch': 3} {'type': 'loss', 'content': 0.00031777346157468855, 'timestamp': '2025-09-10 02:27:47.235598', 'step': 5613, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 7119318626112}, 'timestamp': '2025-09-10 02:27:47.270740', 'step': 5613, 'epoch': 3} {'type': 'loss', 'content': 0.00015662026999052614, 'timestamp': '2025-09-10 02:27:47.278215', 'step': 5614, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 176], 'flops': 5220903722048}, 'timestamp': '2025-09-10 02:27:47.309072', 'step': 5614, 'epoch': 3} {'type': 'loss', 'content': 0.0004629619943443686, 'timestamp': '2025-09-10 02:27:47.312933', 'step': 5615, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 7593922352128}, 'timestamp': '2025-09-10 02:27:47.343858', 'step': 5615, 'epoch': 3} {'type': 'loss', 'content': 9.026808402268216e-05, 'timestamp': '2025-09-10 02:27:47.372453', 'step': 5616, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 6170111174080}, 'timestamp': '2025-09-10 02:27:47.412614', 'step': 5616, 'epoch': 3} {'type': 'loss', 'content': 0.00044115257333032787, 'timestamp': '2025-09-10 02:27:47.417106', 'step': 5617, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 400], 'flops': 11865355886272}, 'timestamp': '2025-09-10 02:27:47.459530', 'step': 5617, 'epoch': 3} {'type': 'loss', 'content': 0.00015083990001585335, 'timestamp': '2025-09-10 02:27:47.475170', 'step': 5618, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 8068526078144}, 'timestamp': '2025-09-10 02:27:47.515880', 'step': 5618, 'epoch': 3} {'type': 'loss', 'content': 0.00014641489542555064, 'timestamp': '2025-09-10 02:27:47.525490', 'step': 5619, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 176], 'flops': 5220903722048}, 'timestamp': '2025-09-10 02:27:47.558296', 'step': 5619, 'epoch': 3} {'type': 'loss', 'content': 0.002689136890694499, 'timestamp': '2025-09-10 02:27:47.583300', 'step': 5620, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 496], 'flops': 14712978242368}, 'timestamp': '2025-09-10 02:27:47.632703', 'step': 5620, 'epoch': 3} {'type': 'loss', 'content': 0.00016491406131535769, 'timestamp': '2025-09-10 02:27:47.649985', 'step': 5621, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 352], 'flops': 10441544708224}, 'timestamp': '2025-09-10 02:27:47.686849', 'step': 5621, 'epoch': 3} {'type': 'loss', 'content': 0.00010262165596941486, 'timestamp': '2025-09-10 02:27:47.700613', 'step': 5622, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 176], 'flops': 5220903722048}, 'timestamp': '2025-09-10 02:27:47.730764', 'step': 5622, 'epoch': 3} {'type': 'loss', 'content': 0.0035222251899540424, 'timestamp': '2025-09-10 02:27:47.734893', 'step': 5623, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 6644714900096}, 'timestamp': '2025-09-10 02:27:47.768301', 'step': 5623, 'epoch': 3} {'type': 'loss', 'content': 0.00014401556109078228, 'timestamp': '2025-09-10 02:27:47.796183', 'step': 5624, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 7119318626112}, 'timestamp': '2025-09-10 02:27:47.829146', 'step': 5624, 'epoch': 3} {'type': 'loss', 'content': 9.734489867696539e-05, 'timestamp': '2025-09-10 02:27:47.834206', 'step': 5625, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 6170111174080}, 'timestamp': '2025-09-10 02:27:47.868850', 'step': 5625, 'epoch': 3} {'type': 'loss', 'content': 0.0001005322701530531, 'timestamp': '2025-09-10 02:27:47.875667', 'step': 5626, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 176], 'flops': 5220903722048}, 'timestamp': '2025-09-10 02:27:47.908700', 'step': 5626, 'epoch': 3} {'type': 'loss', 'content': 0.00047445675591006875, 'timestamp': '2025-09-10 02:27:47.912771', 'step': 5627, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 7119318626112}, 'timestamp': '2025-09-10 02:27:47.946188', 'step': 5627, 'epoch': 3} {'type': 'loss', 'content': 0.01834898255765438, 'timestamp': '2025-09-10 02:27:47.974549', 'step': 5628, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 7119318626112}, 'timestamp': '2025-09-10 02:27:48.006496', 'step': 5628, 'epoch': 3} {'type': 'loss', 'content': 0.011012358590960503, 'timestamp': '2025-09-10 02:27:48.011573', 'step': 5629, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 176], 'flops': 5220903722048}, 'timestamp': '2025-09-10 02:27:48.045571', 'step': 5629, 'epoch': 3} {'type': 'loss', 'content': 5.657299334416166e-05, 'timestamp': '2025-09-10 02:27:48.049587', 'step': 5630, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 176], 'flops': 5220903722048}, 'timestamp': '2025-09-10 02:27:48.081291', 'step': 5630, 'epoch': 3} {'type': 'loss', 'content': 7.444791117450222e-05, 'timestamp': '2025-09-10 02:27:48.085451', 'step': 5631, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 7593922352128}, 'timestamp': '2025-09-10 02:27:48.116422', 'step': 5631, 'epoch': 3} {'type': 'loss', 'content': 0.0001294314133701846, 'timestamp': '2025-09-10 02:27:48.144885', 'step': 5632, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 7593922352128}, 'timestamp': '2025-09-10 02:27:48.175511', 'step': 5632, 'epoch': 3} {'type': 'loss', 'content': 0.0005845736595802009, 'timestamp': '2025-09-10 02:27:48.181018', 'step': 5633, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 6170111174080}, 'timestamp': '2025-09-10 02:27:48.212713', 'step': 5633, 'epoch': 3} {'type': 'loss', 'content': 0.0001259546697838232, 'timestamp': '2025-09-10 02:27:48.219610', 'step': 5634, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 6170111174080}, 'timestamp': '2025-09-10 02:27:48.250841', 'step': 5634, 'epoch': 3} {'type': 'loss', 'content': 8.173291280400008e-05, 'timestamp': '2025-09-10 02:27:48.257649', 'step': 5635, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 320], 'flops': 9492337256192}, 'timestamp': '2025-09-10 02:27:48.289711', 'step': 5635, 'epoch': 3} {'type': 'loss', 'content': 9.338084782939404e-05, 'timestamp': '2025-09-10 02:27:48.323187', 'step': 5636, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 8543129804160}, 'timestamp': '2025-09-10 02:27:48.354729', 'step': 5636, 'epoch': 3} {'type': 'loss', 'content': 0.014086895622313023, 'timestamp': '2025-09-10 02:27:48.363501', 'step': 5637, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 5695507448064}, 'timestamp': '2025-09-10 02:27:48.394692', 'step': 5637, 'epoch': 3} {'type': 'loss', 'content': 0.0006178818293847144, 'timestamp': '2025-09-10 02:27:48.399112', 'step': 5638, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 8068526078144}, 'timestamp': '2025-09-10 02:27:48.430466', 'step': 5638, 'epoch': 3} {'type': 'loss', 'content': 0.003365145530551672, 'timestamp': '2025-09-10 02:27:48.440750', 'step': 5639, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 368], 'flops': 10916148434240}, 'timestamp': '2025-09-10 02:27:48.475898', 'step': 5639, 'epoch': 3} {'type': 'loss', 'content': 0.00015990216343197972, 'timestamp': '2025-09-10 02:27:48.510610', 'step': 5640, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 7119318626112}, 'timestamp': '2025-09-10 02:27:48.541734', 'step': 5640, 'epoch': 3} {'type': 'loss', 'content': 0.0005560553981922567, 'timestamp': '2025-09-10 02:27:48.546755', 'step': 5641, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 7593922352128}, 'timestamp': '2025-09-10 02:27:48.578165', 'step': 5641, 'epoch': 3} {'type': 'loss', 'content': 0.0001505583932157606, 'timestamp': '2025-09-10 02:27:48.585776', 'step': 5642, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 8068526078144}, 'timestamp': '2025-09-10 02:27:48.616419', 'step': 5642, 'epoch': 3} {'type': 'loss', 'content': 0.00036215136060491204, 'timestamp': '2025-09-10 02:27:48.626544', 'step': 5643, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 176], 'flops': 5220903722048}, 'timestamp': '2025-09-10 02:27:48.658433', 'step': 5643, 'epoch': 3} {'type': 'loss', 'content': 0.00019817725114990026, 'timestamp': '2025-09-10 02:27:48.683629', 'step': 5644, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 7593922352128}, 'timestamp': '2025-09-10 02:27:48.714705', 'step': 5644, 'epoch': 3} {'type': 'loss', 'content': 0.0002911986375693232, 'timestamp': '2025-09-10 02:27:48.720004', 'step': 5645, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 8543129804160}, 'timestamp': '2025-09-10 02:27:48.751853', 'step': 5645, 'epoch': 3} {'type': 'loss', 'content': 0.0006733777699992061, 'timestamp': '2025-09-10 02:27:48.762610', 'step': 5646, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 5695507448064}, 'timestamp': '2025-09-10 02:27:48.793476', 'step': 5646, 'epoch': 3} {'type': 'loss', 'content': 0.00010006874072132632, 'timestamp': '2025-09-10 02:27:48.797969', 'step': 5647, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 304], 'flops': 9017733530176}, 'timestamp': '2025-09-10 02:27:48.829166', 'step': 5647, 'epoch': 3} {'type': 'loss', 'content': 0.00048814056208357215, 'timestamp': '2025-09-10 02:27:48.862325', 'step': 5648, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 7119318626112}, 'timestamp': '2025-09-10 02:27:48.894618', 'step': 5648, 'epoch': 3} {'type': 'loss', 'content': 0.001697771018370986, 'timestamp': '2025-09-10 02:27:48.899692', 'step': 5649, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 176], 'flops': 5220903722048}, 'timestamp': '2025-09-10 02:27:48.930035', 'step': 5649, 'epoch': 3} {'type': 'loss', 'content': 0.0001428636023774743, 'timestamp': '2025-09-10 02:27:48.934199', 'step': 5650, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 8543129804160}, 'timestamp': '2025-09-10 02:27:48.965831', 'step': 5650, 'epoch': 3} {'type': 'loss', 'content': 0.000209279969567433, 'timestamp': '2025-09-10 02:27:48.976845', 'step': 5651, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 5695507448064}, 'timestamp': '2025-09-10 02:27:49.007514', 'step': 5651, 'epoch': 3} {'type': 'loss', 'content': 0.00015839662228245288, 'timestamp': '2025-09-10 02:27:49.032626', 'step': 5652, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 6644714900096}, 'timestamp': '2025-09-10 02:27:49.063881', 'step': 5652, 'epoch': 3} {'type': 'loss', 'content': 9.35560601647012e-05, 'timestamp': '2025-09-10 02:27:49.068742', 'step': 5653, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 7119318626112}, 'timestamp': '2025-09-10 02:27:49.099818', 'step': 5653, 'epoch': 3} {'type': 'loss', 'content': 0.0006064533954486251, 'timestamp': '2025-09-10 02:27:49.107264', 'step': 5654, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 416], 'flops': 12339959612288}, 'timestamp': '2025-09-10 02:27:49.145723', 'step': 5654, 'epoch': 3} {'type': 'loss', 'content': 0.0011858725920319557, 'timestamp': '2025-09-10 02:27:49.161628', 'step': 5655, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 7119318626112}, 'timestamp': '2025-09-10 02:27:49.193563', 'step': 5655, 'epoch': 3} {'type': 'loss', 'content': 0.00020078910165466368, 'timestamp': '2025-09-10 02:27:49.222016', 'step': 5656, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 7593922352128}, 'timestamp': '2025-09-10 02:27:49.258814', 'step': 5656, 'epoch': 3} {'type': 'loss', 'content': 0.0003024769830517471, 'timestamp': '2025-09-10 02:27:49.264152', 'step': 5657, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 5695507448064}, 'timestamp': '2025-09-10 02:27:49.295019', 'step': 5657, 'epoch': 3} {'type': 'loss', 'content': 0.00010363813635194674, 'timestamp': '2025-09-10 02:27:49.299630', 'step': 5658, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 6170111174080}, 'timestamp': '2025-09-10 02:27:49.330561', 'step': 5658, 'epoch': 3} {'type': 'loss', 'content': 0.000979832955636084, 'timestamp': '2025-09-10 02:27:49.337473', 'step': 5659, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 7119318626112}, 'timestamp': '2025-09-10 02:27:49.368731', 'step': 5659, 'epoch': 3} {'type': 'loss', 'content': 0.00316768535412848, 'timestamp': '2025-09-10 02:27:49.397153', 'step': 5660, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 7119318626112}, 'timestamp': '2025-09-10 02:27:49.428455', 'step': 5660, 'epoch': 3} {'type': 'loss', 'content': 0.000212435275898315, 'timestamp': '2025-09-10 02:27:49.433700', 'step': 5661, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 6644714900096}, 'timestamp': '2025-09-10 02:27:49.464662', 'step': 5661, 'epoch': 3} {'type': 'loss', 'content': 0.0002012075565289706, 'timestamp': '2025-09-10 02:27:49.471690', 'step': 5662, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 7593922352128}, 'timestamp': '2025-09-10 02:27:49.503263', 'step': 5662, 'epoch': 3} {'type': 'loss', 'content': 0.00011027476284652948, 'timestamp': '2025-09-10 02:27:49.510978', 'step': 5663, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 6644714900096}, 'timestamp': '2025-09-10 02:27:49.542521', 'step': 5663, 'epoch': 3} {'type': 'loss', 'content': 0.0003467754868324846, 'timestamp': '2025-09-10 02:27:49.570507', 'step': 5664, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 7593922352128}, 'timestamp': '2025-09-10 02:27:49.603015', 'step': 5664, 'epoch': 3} {'type': 'loss', 'content': 0.0003236646589357406, 'timestamp': '2025-09-10 02:27:49.608415', 'step': 5665, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 8543129804160}, 'timestamp': '2025-09-10 02:27:49.640871', 'step': 5665, 'epoch': 3} {'type': 'loss', 'content': 0.00015571604308206588, 'timestamp': '2025-09-10 02:27:49.651335', 'step': 5666, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 7119318626112}, 'timestamp': '2025-09-10 02:27:49.683553', 'step': 5666, 'epoch': 3} {'type': 'loss', 'content': 8.091597555903718e-05, 'timestamp': '2025-09-10 02:27:49.691003', 'step': 5667, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 8068526078144}, 'timestamp': '2025-09-10 02:27:49.723344', 'step': 5667, 'epoch': 3} {'type': 'loss', 'content': 0.007872511632740498, 'timestamp': '2025-09-10 02:27:49.754583', 'step': 5668, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 336], 'flops': 9966940982208}, 'timestamp': '2025-09-10 02:27:49.786086', 'step': 5668, 'epoch': 3} {'type': 'loss', 'content': 0.0007030696724541485, 'timestamp': '2025-09-10 02:27:49.798738', 'step': 5669, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 8068526078144}, 'timestamp': '2025-09-10 02:27:49.831188', 'step': 5669, 'epoch': 3} {'type': 'loss', 'content': 0.0006685466505587101, 'timestamp': '2025-09-10 02:27:49.841817', 'step': 5670, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 176], 'flops': 5220903722048}, 'timestamp': '2025-09-10 02:27:49.874810', 'step': 5670, 'epoch': 3} {'type': 'loss', 'content': 0.04719764366745949, 'timestamp': '2025-09-10 02:27:49.879227', 'step': 5671, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 176], 'flops': 5220903722048}, 'timestamp': '2025-09-10 02:27:49.909984', 'step': 5671, 'epoch': 3} {'type': 'loss', 'content': 0.0005829405854456127, 'timestamp': '2025-09-10 02:27:49.935147', 'step': 5672, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 4746299996032}, 'timestamp': '2025-09-10 02:27:49.966937', 'step': 5672, 'epoch': 3} {'type': 'loss', 'content': 0.00043701488175429404, 'timestamp': '2025-09-10 02:27:49.969613', 'step': 5673, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 5695507448064}, 'timestamp': '2025-09-10 02:27:50.000908', 'step': 5673, 'epoch': 3} {'type': 'loss', 'content': 0.00024339115770999342, 'timestamp': '2025-09-10 02:27:50.005518', 'step': 5674, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 7119318626112}, 'timestamp': '2025-09-10 02:27:50.036102', 'step': 5674, 'epoch': 3} {'type': 'loss', 'content': 0.00014959640975575894, 'timestamp': '2025-09-10 02:27:50.043834', 'step': 5675, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 384], 'flops': 11390752160256}, 'timestamp': '2025-09-10 02:27:50.080322', 'step': 5675, 'epoch': 3} {'type': 'loss', 'content': 6.585565279237926e-05, 'timestamp': '2025-09-10 02:27:50.115216', 'step': 5676, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 304], 'flops': 9017733530176}, 'timestamp': '2025-09-10 02:27:50.146329', 'step': 5676, 'epoch': 3} {'type': 'loss', 'content': 0.05050432309508324, 'timestamp': '2025-09-10 02:27:50.156092', 'step': 5677, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 368], 'flops': 10916148434240}, 'timestamp': '2025-09-10 02:27:50.190885', 'step': 5677, 'epoch': 3} {'type': 'loss', 'content': 0.00037958953180350363, 'timestamp': '2025-09-10 02:27:50.204725', 'step': 5678, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 6644714900096}, 'timestamp': '2025-09-10 02:27:50.235976', 'step': 5678, 'epoch': 3} {'type': 'loss', 'content': 0.00040227436693385243, 'timestamp': '2025-09-10 02:27:50.242748', 'step': 5679, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 176], 'flops': 5220903722048}, 'timestamp': '2025-09-10 02:27:50.273801', 'step': 5679, 'epoch': 3} {'type': 'loss', 'content': 0.00012008142948616296, 'timestamp': '2025-09-10 02:27:50.298810', 'step': 5680, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 6644714900096}, 'timestamp': '2025-09-10 02:27:50.329195', 'step': 5680, 'epoch': 3} {'type': 'loss', 'content': 9.119750757236034e-05, 'timestamp': '2025-09-10 02:27:50.333798', 'step': 5681, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 320], 'flops': 9492337256192}, 'timestamp': '2025-09-10 02:27:50.365461', 'step': 5681, 'epoch': 3} {'type': 'loss', 'content': 0.0011381086660549045, 'timestamp': '2025-09-10 02:27:50.378048', 'step': 5682, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 576], 'flops': 17085996872448}, 'timestamp': '2025-09-10 02:27:50.427147', 'step': 5682, 'epoch': 3} {'type': 'loss', 'content': 0.0008125408785417676, 'timestamp': '2025-09-10 02:27:50.446582', 'step': 5683, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 8068526078144}, 'timestamp': '2025-09-10 02:27:50.478234', 'step': 5683, 'epoch': 3} {'type': 'loss', 'content': 0.00010114780889125541, 'timestamp': '2025-09-10 02:27:50.509371', 'step': 5684, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 5695507448064}, 'timestamp': '2025-09-10 02:27:50.540848', 'step': 5684, 'epoch': 3} {'type': 'loss', 'content': 0.0005297398311085999, 'timestamp': '2025-09-10 02:27:50.543033', 'step': 5685, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 7593922352128}, 'timestamp': '2025-09-10 02:27:50.574441', 'step': 5685, 'epoch': 3} {'type': 'loss', 'content': 0.0005330296116881073, 'timestamp': '2025-09-10 02:27:50.582221', 'step': 5686, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 352], 'flops': 10441544708224}, 'timestamp': '2025-09-10 02:27:50.616873', 'step': 5686, 'epoch': 3} {'type': 'loss', 'content': 0.0010487495455890894, 'timestamp': '2025-09-10 02:27:50.630586', 'step': 5687, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-10 02:27:50.661249', 'step': 5687, 'epoch': 3} {'type': 'loss', 'content': 0.00015214362065307796, 'timestamp': '2025-09-10 02:27:50.685189', 'step': 5688, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 176], 'flops': 5220903722048}, 'timestamp': '2025-09-10 02:27:50.717537', 'step': 5688, 'epoch': 3} {'type': 'loss', 'content': 0.010265544056892395, 'timestamp': '2025-09-10 02:27:50.719956', 'step': 5689, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 6644714900096}, 'timestamp': '2025-09-10 02:27:50.750598', 'step': 5689, 'epoch': 3} {'type': 'loss', 'content': 0.0020830826833844185, 'timestamp': '2025-09-10 02:27:50.757664', 'step': 5690, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 5695507448064}, 'timestamp': '2025-09-10 02:27:50.789024', 'step': 5690, 'epoch': 3} {'type': 'loss', 'content': 0.0004475609748624265, 'timestamp': '2025-09-10 02:27:50.793326', 'step': 5691, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-10 02:27:50.824013', 'step': 5691, 'epoch': 3} {'type': 'loss', 'content': 0.0002475904766470194, 'timestamp': '2025-09-10 02:27:50.847535', 'step': 5692, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 5695507448064}, 'timestamp': '2025-09-10 02:27:50.879538', 'step': 5692, 'epoch': 3} {'type': 'loss', 'content': 0.0027441454585641623, 'timestamp': '2025-09-10 02:27:50.881916', 'step': 5693, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 8068526078144}, 'timestamp': '2025-09-10 02:27:50.916408', 'step': 5693, 'epoch': 3} {'type': 'loss', 'content': 0.0001911252038553357, 'timestamp': '2025-09-10 02:27:50.926786', 'step': 5694, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 7119318626112}, 'timestamp': '2025-09-10 02:27:50.958900', 'step': 5694, 'epoch': 3} {'type': 'loss', 'content': 5.5323111155303195e-05, 'timestamp': '2025-09-10 02:27:50.966470', 'step': 5695, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 8543129804160}, 'timestamp': '2025-09-10 02:27:50.999357', 'step': 5695, 'epoch': 3} {'type': 'loss', 'content': 0.0004969104775227606, 'timestamp': '2025-09-10 02:27:51.031237', 'step': 5696, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 176], 'flops': 5220903722048}, 'timestamp': '2025-09-10 02:27:51.063553', 'step': 5696, 'epoch': 3} {'type': 'loss', 'content': 0.03493015095591545, 'timestamp': '2025-09-10 02:27:51.065993', 'step': 5697, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 6644714900096}, 'timestamp': '2025-09-10 02:27:51.097386', 'step': 5697, 'epoch': 3} {'type': 'loss', 'content': 8.232889376813546e-05, 'timestamp': '2025-09-10 02:27:51.104580', 'step': 5698, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-10 02:27:51.135783', 'step': 5698, 'epoch': 3} {'type': 'loss', 'content': 0.0008442237740382552, 'timestamp': '2025-09-10 02:27:51.139307', 'step': 5699, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 4746299996032}, 'timestamp': '2025-09-10 02:27:51.170109', 'step': 5699, 'epoch': 3} {'type': 'loss', 'content': 0.006859573069959879, 'timestamp': '2025-09-10 02:27:51.194266', 'step': 5700, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 7119318626112}, 'timestamp': '2025-09-10 02:27:51.227350', 'step': 5700, 'epoch': 3} {'type': 'loss', 'content': 0.0002599300933070481, 'timestamp': '2025-09-10 02:27:51.232459', 'step': 5701, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 336], 'flops': 9966940982208}, 'timestamp': '2025-09-10 02:27:51.266118', 'step': 5701, 'epoch': 3} {'type': 'loss', 'content': 0.004736356902867556, 'timestamp': '2025-09-10 02:27:51.279485', 'step': 5702, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 5695507448064}, 'timestamp': '2025-09-10 02:27:51.311764', 'step': 5702, 'epoch': 3} {'type': 'loss', 'content': 0.0007690637721680105, 'timestamp': '2025-09-10 02:27:51.316233', 'step': 5703, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 304], 'flops': 9017733530176}, 'timestamp': '2025-09-10 02:27:51.347562', 'step': 5703, 'epoch': 3} {'type': 'loss', 'content': 0.0204758383333683, 'timestamp': '2025-09-10 02:27:51.380594', 'step': 5704, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 6170111174080}, 'timestamp': '2025-09-10 02:27:51.412550', 'step': 5704, 'epoch': 3} {'type': 'loss', 'content': 0.0011761164059862494, 'timestamp': '2025-09-10 02:27:51.416928', 'step': 5705, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 7119318626112}, 'timestamp': '2025-09-10 02:27:51.448539', 'step': 5705, 'epoch': 3} {'type': 'loss', 'content': 5.793437594547868e-05, 'timestamp': '2025-09-10 02:27:51.455944', 'step': 5706, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 6644714900096}, 'timestamp': '2025-09-10 02:27:51.487078', 'step': 5706, 'epoch': 3} {'type': 'loss', 'content': 8.091299969237298e-05, 'timestamp': '2025-09-10 02:27:51.493963', 'step': 5707, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 5695507448064}, 'timestamp': '2025-09-10 02:27:51.526583', 'step': 5707, 'epoch': 3} {'type': 'loss', 'content': 0.03535769507288933, 'timestamp': '2025-09-10 02:27:51.551702', 'step': 5708, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 7593922352128}, 'timestamp': '2025-09-10 02:27:51.583111', 'step': 5708, 'epoch': 3} {'type': 'loss', 'content': 0.0004072172741871327, 'timestamp': '2025-09-10 02:27:51.588498', 'step': 5709, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 7593922352128}, 'timestamp': '2025-09-10 02:27:51.619214', 'step': 5709, 'epoch': 3} {'type': 'loss', 'content': 0.0003087377699557692, 'timestamp': '2025-09-10 02:27:51.627130', 'step': 5710, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 304], 'flops': 9017733530176}, 'timestamp': '2025-09-10 02:27:51.658723', 'step': 5710, 'epoch': 3} {'type': 'loss', 'content': 0.006235843989998102, 'timestamp': '2025-09-10 02:27:51.670864', 'step': 5711, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 7119318626112}, 'timestamp': '2025-09-10 02:27:51.702195', 'step': 5711, 'epoch': 3} {'type': 'loss', 'content': 7.547135464847088e-05, 'timestamp': '2025-09-10 02:27:51.730599', 'step': 5712, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 7119318626112}, 'timestamp': '2025-09-10 02:27:51.762336', 'step': 5712, 'epoch': 3} {'type': 'loss', 'content': 0.0005502524436451495, 'timestamp': '2025-09-10 02:27:51.767362', 'step': 5713, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 320], 'flops': 9492337256192}, 'timestamp': '2025-09-10 02:27:51.798998', 'step': 5713, 'epoch': 3} {'type': 'loss', 'content': 0.0001679424021858722, 'timestamp': '2025-09-10 02:27:51.811368', 'step': 5714, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 4746299996032}, 'timestamp': '2025-09-10 02:27:51.842756', 'step': 5714, 'epoch': 3} {'type': 'loss', 'content': 0.0006052498356439173, 'timestamp': '2025-09-10 02:27:51.845260', 'step': 5715, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 304], 'flops': 9017733530176}, 'timestamp': '2025-09-10 02:27:51.877035', 'step': 5715, 'epoch': 3} {'type': 'loss', 'content': 0.0018796491203829646, 'timestamp': '2025-09-10 02:27:51.910146', 'step': 5716, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 176], 'flops': 5220903722048}, 'timestamp': '2025-09-10 02:27:51.940836', 'step': 5716, 'epoch': 3} {'type': 'loss', 'content': 0.015208103694021702, 'timestamp': '2025-09-10 02:27:51.943246', 'step': 5717, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 8068526078144}, 'timestamp': '2025-09-10 02:27:51.974973', 'step': 5717, 'epoch': 3} {'type': 'loss', 'content': 0.0038770330138504505, 'timestamp': '2025-09-10 02:27:51.985251', 'step': 5718, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 8068526078144}, 'timestamp': '2025-09-10 02:27:52.017433', 'step': 5718, 'epoch': 3} {'type': 'loss', 'content': 0.0001229040208272636, 'timestamp': '2025-09-10 02:27:52.027604', 'step': 5719, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 6644714900096}, 'timestamp': '2025-09-10 02:27:52.058545', 'step': 5719, 'epoch': 3} {'type': 'loss', 'content': 0.0011433316394686699, 'timestamp': '2025-09-10 02:27:52.086539', 'step': 5720, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 176], 'flops': 5220903722048}, 'timestamp': '2025-09-10 02:27:52.117962', 'step': 5720, 'epoch': 3} {'type': 'loss', 'content': 0.0003462762397248298, 'timestamp': '2025-09-10 02:27:52.120244', 'step': 5721, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 6644714900096}, 'timestamp': '2025-09-10 02:27:52.152796', 'step': 5721, 'epoch': 3} {'type': 'loss', 'content': 0.000564678106456995, 'timestamp': '2025-09-10 02:27:52.159875', 'step': 5722, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 7119318626112}, 'timestamp': '2025-09-10 02:27:52.192123', 'step': 5722, 'epoch': 3} {'type': 'loss', 'content': 0.0010591919999569654, 'timestamp': '2025-09-10 02:27:52.199658', 'step': 5723, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 6170111174080}, 'timestamp': '2025-09-10 02:27:52.230817', 'step': 5723, 'epoch': 3} {'type': 'loss', 'content': 0.01734619028866291, 'timestamp': '2025-09-10 02:27:52.258566', 'step': 5724, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 8068526078144}, 'timestamp': '2025-09-10 02:27:52.289646', 'step': 5724, 'epoch': 3} {'type': 'loss', 'content': 0.0001629464386496693, 'timestamp': '2025-09-10 02:27:52.297564', 'step': 5725, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 176], 'flops': 5220903722048}, 'timestamp': '2025-09-10 02:27:52.330081', 'step': 5725, 'epoch': 3} {'type': 'loss', 'content': 3.870922591886483e-05, 'timestamp': '2025-09-10 02:27:52.333926', 'step': 5726, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 5695507448064}, 'timestamp': '2025-09-10 02:27:52.365369', 'step': 5726, 'epoch': 3} {'type': 'loss', 'content': 0.018190907314419746, 'timestamp': '2025-09-10 02:27:52.369832', 'step': 5727, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 5695507448064}, 'timestamp': '2025-09-10 02:27:52.400840', 'step': 5727, 'epoch': 3} {'type': 'loss', 'content': 2.9860497306799516e-05, 'timestamp': '2025-09-10 02:27:52.426196', 'step': 5728, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 8543129804160}, 'timestamp': '2025-09-10 02:27:52.456878', 'step': 5728, 'epoch': 3} {'type': 'loss', 'content': 0.0004968225257471204, 'timestamp': '2025-09-10 02:27:52.465537', 'step': 5729, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 7593922352128}, 'timestamp': '2025-09-10 02:27:52.496269', 'step': 5729, 'epoch': 3} {'type': 'loss', 'content': 0.01434040255844593, 'timestamp': '2025-09-10 02:27:52.504074', 'step': 5730, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 6644714900096}, 'timestamp': '2025-09-10 02:27:52.537346', 'step': 5730, 'epoch': 3} {'type': 'loss', 'content': 0.005067458841949701, 'timestamp': '2025-09-10 02:27:52.544273', 'step': 5731, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 8543129804160}, 'timestamp': '2025-09-10 02:27:52.576225', 'step': 5731, 'epoch': 3} {'type': 'loss', 'content': 9.439605491934344e-05, 'timestamp': '2025-09-10 02:27:52.608122', 'step': 5732, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 416], 'flops': 12339959612288}, 'timestamp': '2025-09-10 02:27:52.645892', 'step': 5732, 'epoch': 3} {'type': 'loss', 'content': 0.003644505748525262, 'timestamp': '2025-09-10 02:27:52.661313', 'step': 5733, 'epoch': 3} {'type': 'flops', 'content': [{'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5695213340928}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3796808916480}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4429610391296}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4746011128704}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4746011128704}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6011614078336}, {'type': 'perplexity', 'in_batch_dim': [8, 736], 'batch_size': 8, 'flops': 14554433988352}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3796808916480}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4113209653888}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3796808916480}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4746011128704}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2847606704256}, {'type': 'perplexity', 'in_batch_dim': [8, 320], 'batch_size': 8, 'flops': 6328014815744}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5378812603520}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3796808916480}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4746011128704}, {'type': 'perplexity', 'in_batch_dim': [8, 352], 'batch_size': 8, 'flops': 6960816290560}, {'type': 'perplexity', 'in_batch_dim': [8, 384], 'batch_size': 8, 'flops': 7593617765376}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5378812603520}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4429610391296}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4113209653888}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5062411866112}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5062411866112}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4429610391296}, {'type': 'perplexity', 'in_batch_dim': [8, 480], 'batch_size': 8, 'flops': 9492022189824}, {'type': 'perplexity', 'in_batch_dim': [8, 448], 'batch_size': 8, 'flops': 8859220715008}, {'type': 'perplexity', 'in_batch_dim': [8, 320], 'batch_size': 8, 'flops': 6328014815744}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3796808916480}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6011614078336}, {'type': 'perplexity', 'in_batch_dim': [8, 320], 'batch_size': 8, 'flops': 6328014815744}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5695213340928}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4746011128704}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4429610391296}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5378812603520}, {'type': 'perplexity', 'in_batch_dim': [8, 544], 'batch_size': 8, 'flops': 10757625139456}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4429610391296}, {'type': 'perplexity', 'in_batch_dim': [8, 320], 'batch_size': 8, 'flops': 6328014815744}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4113209653888}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4113209653888}, {'type': 'perplexity', 'in_batch_dim': [8, 320], 'batch_size': 8, 'flops': 6328014815744}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3480408179072}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5062411866112}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6011614078336}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4429610391296}, {'type': 'perplexity', 'in_batch_dim': [8, 384], 'batch_size': 8, 'flops': 7593617765376}, {'type': 'perplexity', 'in_batch_dim': [8, 416], 'batch_size': 8, 'flops': 8226419240192}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4746011128704}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3480408179072}, {'type': 'perplexity', 'in_batch_dim': [8, 384], 'batch_size': 8, 'flops': 7593617765376}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3480408179072}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3480408179072}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5378812603520}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3796808916480}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4113209653888}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5062411866112}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5378812603520}, {'type': 'perplexity', 'in_batch_dim': [8, 320], 'batch_size': 8, 'flops': 6328014815744}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4429610391296}, {'type': 'perplexity', 'in_batch_dim': [8, 160], 'batch_size': 8, 'flops': 3164007441664}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4113209653888}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5695213340928}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3796808916480}, {'type': 'perplexity', 'in_batch_dim': [8, 160], 'batch_size': 8, 'flops': 3164007441664}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6011614078336}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5062411866112}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4429610391296}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3480408179072}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4746011128704}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3480408179072}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4113209653888}, {'type': 'perplexity', 'in_batch_dim': [8, 624], 'batch_size': 8, 'flops': 12339628826496}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4113209653888}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5695213340928}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5062411866112}, {'type': 'perplexity', 'in_batch_dim': [8, 384], 'batch_size': 8, 'flops': 7593617765376}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4429610391296}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4113209653888}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5062411866112}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4429610391296}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5695213340928}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3796808916480}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5378812603520}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4429610391296}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5378812603520}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3796808916480}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5695213340928}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4429610391296}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5062411866112}, {'type': 'perplexity', 'in_batch_dim': [8, 160], 'batch_size': 8, 'flops': 3164007441664}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4746011128704}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5695213340928}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3796808916480}, {'type': 'perplexity', 'in_batch_dim': [8, 416], 'batch_size': 8, 'flops': 8226419240192}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3796808916480}, {'type': 'perplexity', 'in_batch_dim': [8, 160], 'batch_size': 8, 'flops': 3164007441664}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4746011128704}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3796808916480}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2847606704256}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5378812603520}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4429610391296}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4113209653888}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4429610391296}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6011614078336}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4113209653888}, {'type': 'perplexity', 'in_batch_dim': [8, 320], 'batch_size': 8, 'flops': 6328014815744}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4746011128704}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4746011128704}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3796808916480}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5062411866112}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3480408179072}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3796808916480}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3480408179072}, {'type': 'perplexity', 'in_batch_dim': [8, 352], 'batch_size': 8, 'flops': 6960816290560}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5062411866112}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4429610391296}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6011614078336}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5062411866112}, {'type': 'perplexity', 'in_batch_dim': [8, 576], 'batch_size': 8, 'flops': 11390426614272}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4429610391296}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3796808916480}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6011614078336}, {'type': 'perplexity', 'in_batch_dim': [8, 432], 'batch_size': 8, 'flops': 8542819977600}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5378812603520}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5378812603520}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5062411866112}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4113209653888}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4113209653888}, {'type': 'perplexity', 'in_batch_dim': [8, 368], 'batch_size': 8, 'flops': 7277217027968}, {'type': 'perplexity', 'in_batch_dim': [8, 160], 'batch_size': 8, 'flops': 3164007441664}, {'type': 'perplexity', 'in_batch_dim': [8, 368], 'batch_size': 8, 'flops': 7277217027968}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3796808916480}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4746011128704}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4113209653888}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4429610391296}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5695213340928}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5695213340928}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5062411866112}, {'type': 'perplexity', 'in_batch_dim': [8, 432], 'batch_size': 8, 'flops': 8542819977600}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5062411866112}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4113209653888}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3480408179072}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3480408179072}, {'type': 'perplexity', 'in_batch_dim': [8, 160], 'batch_size': 8, 'flops': 3164007441664}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4113209653888}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3796808916480}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5062411866112}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 368], 'batch_size': 8, 'flops': 7277217027968}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5378812603520}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5378812603520}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5062411866112}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2847606704256}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4429610391296}, {'type': 'perplexity', 'in_batch_dim': [8, 320], 'batch_size': 8, 'flops': 6328014815744}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3796808916480}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4429610391296}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4113209653888}, {'type': 'perplexity', 'in_batch_dim': [8, 384], 'batch_size': 8, 'flops': 7593617765376}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5378812603520}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4746011128704}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3480408179072}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5062411866112}, {'type': 'perplexity', 'in_batch_dim': [8, 320], 'batch_size': 8, 'flops': 6328014815744}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5378812603520}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4113209653888}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4113209653888}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5695213340928}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3480408179072}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6011614078336}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4746011128704}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5378812603520}, {'type': 'perplexity', 'in_batch_dim': [8, 320], 'batch_size': 8, 'flops': 6328014815744}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4429610391296}, {'type': 'perplexity', 'in_batch_dim': [8, 320], 'batch_size': 8, 'flops': 6328014815744}, {'type': 'perplexity', 'in_batch_dim': [8, 416], 'batch_size': 8, 'flops': 8226419240192}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5062411866112}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5062411866112}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2847606704256}, {'type': 'perplexity', 'in_batch_dim': [8, 336], 'batch_size': 8, 'flops': 6644415553152}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6011614078336}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5062411866112}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4113209653888}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5695213340928}, {'type': 'perplexity', 'in_batch_dim': [8, 416], 'batch_size': 8, 'flops': 8226419240192}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3796808916480}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5695213340928}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4746011128704}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4429610391296}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4113209653888}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3480408179072}, {'type': 'perplexity', 'in_batch_dim': [8, 320], 'batch_size': 8, 'flops': 6328014815744}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3480408179072}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5378812603520}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6011614078336}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4429610391296}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5695213340928}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5695213340928}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3480408179072}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4429610391296}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5062411866112}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3480408179072}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4113209653888}, {'type': 'perplexity', 'in_batch_dim': [8, 352], 'batch_size': 8, 'flops': 6960816290560}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4113209653888}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2847606704256}, {'type': 'perplexity', 'in_batch_dim': [8, 160], 'batch_size': 8, 'flops': 3164007441664}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3796808916480}, {'type': 'perplexity', 'in_batch_dim': [8, 496], 'batch_size': 8, 'flops': 9808422927232}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3480408179072}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4113209653888}, {'type': 'perplexity', 'in_batch_dim': [8, 544], 'batch_size': 8, 'flops': 10757625139456}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6011614078336}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5378812603520}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5695213340928}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4746011128704}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5695213340928}, {'type': 'perplexity', 'in_batch_dim': [8, 384], 'batch_size': 8, 'flops': 7593617765376}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3480408179072}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5378812603520}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3480408179072}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5378812603520}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5062411866112}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3796808916480}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5062411866112}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5695213340928}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5378812603520}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4429610391296}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5062411866112}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3480408179072}, {'type': 'perplexity', 'in_batch_dim': [8, 656], 'batch_size': 8, 'flops': 12972430301312}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5062411866112}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3796808916480}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3480408179072}, {'type': 'perplexity', 'in_batch_dim': [8, 160], 'batch_size': 8, 'flops': 3164007441664}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5695213340928}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6011614078336}, {'type': 'perplexity', 'in_batch_dim': [8, 320], 'batch_size': 8, 'flops': 6328014815744}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5062411866112}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5062411866112}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5695213340928}, {'type': 'perplexity', 'in_batch_dim': [8, 320], 'batch_size': 8, 'flops': 6328014815744}, {'type': 'perplexity', 'in_batch_dim': [8, 384], 'batch_size': 8, 'flops': 7593617765376}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5062411866112}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4746011128704}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4113209653888}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4746011128704}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4746011128704}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3480408179072}, {'type': 'perplexity', 'in_batch_dim': [8, 320], 'batch_size': 8, 'flops': 6328014815744}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5695213340928}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5062411866112}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4113209653888}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4746011128704}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2847606704256}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3796808916480}, {'type': 'perplexity', 'in_batch_dim': [8, 336], 'batch_size': 8, 'flops': 6644415553152}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4113209653888}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4746011128704}, {'type': 'perplexity', 'in_batch_dim': [8, 496], 'batch_size': 8, 'flops': 9808422927232}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5378812603520}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5378812603520}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4429610391296}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5062411866112}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6011614078336}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4746011128704}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3796808916480}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4113209653888}, {'type': 'perplexity', 'in_batch_dim': [8, 496], 'batch_size': 8, 'flops': 9808422927232}, {'type': 'perplexity', 'in_batch_dim': [8, 448], 'batch_size': 8, 'flops': 8859220715008}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6011614078336}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3796808916480}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3480408179072}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5062411866112}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4429610391296}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4113209653888}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4746011128704}, {'type': 'perplexity', 'in_batch_dim': [8, 320], 'batch_size': 8, 'flops': 6328014815744}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6011614078336}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3480408179072}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3796808916480}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3796808916480}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4746011128704}, {'type': 'perplexity', 'in_batch_dim': [8, 416], 'batch_size': 8, 'flops': 8226419240192}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5695213340928}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6011614078336}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6011614078336}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4746011128704}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3480408179072}, {'type': 'perplexity', 'in_batch_dim': [8, 160], 'batch_size': 8, 'flops': 3164007441664}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5695213340928}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3480408179072}, {'type': 'perplexity', 'in_batch_dim': [8, 400], 'batch_size': 8, 'flops': 7910018502784}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4429610391296}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5378812603520}, {'type': 'perplexity', 'in_batch_dim': [8, 400], 'batch_size': 8, 'flops': 7910018502784}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5378812603520}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3480408179072}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6011614078336}, {'type': 'perplexity', 'in_batch_dim': [8, 160], 'batch_size': 8, 'flops': 3164007441664}, {'type': 'perplexity', 'in_batch_dim': [8, 352], 'batch_size': 8, 'flops': 6960816290560}, {'type': 'perplexity', 'in_batch_dim': [8, 320], 'batch_size': 8, 'flops': 6328014815744}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5062411866112}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5062411866112}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4113209653888}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4429610391296}, {'type': 'perplexity', 'in_batch_dim': [8, 352], 'batch_size': 8, 'flops': 6960816290560}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4113209653888}, {'type': 'perplexity', 'in_batch_dim': [8, 160], 'batch_size': 8, 'flops': 3164007441664}, {'type': 'perplexity', 'in_batch_dim': [8, 336], 'batch_size': 8, 'flops': 6644415553152}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4429610391296}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3796808916480}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5378812603520}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5378812603520}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4113209653888}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5378812603520}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3796808916480}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5378812603520}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5378812603520}, {'type': 'perplexity', 'in_batch_dim': [8, 160], 'batch_size': 8, 'flops': 3164007441664}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3480408179072}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6011614078336}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5695213340928}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5378812603520}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5378812603520}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4429610391296}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4746011128704}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6011614078336}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5062411866112}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6011614078336}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5378812603520}, {'type': 'perplexity', 'in_batch_dim': [8, 160], 'batch_size': 8, 'flops': 3164007441664}, {'type': 'perplexity', 'in_batch_dim': [8, 160], 'batch_size': 8, 'flops': 3164007441664}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5062411866112}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5062411866112}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3796808916480}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6011614078336}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4113209653888}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4746011128704}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5378812603520}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3480408179072}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3796808916480}, {'type': 'perplexity', 'in_batch_dim': [8, 320], 'batch_size': 8, 'flops': 6328014815744}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5062411866112}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4746011128704}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5062411866112}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4746011128704}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6011614078336}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3796808916480}, {'type': 'perplexity', 'in_batch_dim': [8, 160], 'batch_size': 8, 'flops': 3164007441664}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2847606704256}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4429610391296}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4746011128704}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5062411866112}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5378812603520}, {'type': 'perplexity', 'in_batch_dim': [8, 464], 'batch_size': 8, 'flops': 9175621452416}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4113209653888}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3480408179072}, {'type': 'perplexity', 'in_batch_dim': [8, 384], 'batch_size': 8, 'flops': 7593617765376}, {'type': 'perplexity', 'in_batch_dim': [8, 448], 'batch_size': 8, 'flops': 8859220715008}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5378812603520}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4113209653888}, {'type': 'perplexity', 'in_batch_dim': [8, 320], 'batch_size': 8, 'flops': 6328014815744}, {'type': 'perplexity', 'in_batch_dim': [8, 560], 'batch_size': 8, 'flops': 11074025876864}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5378812603520}, {'type': 'perplexity', 'in_batch_dim': [8, 160], 'batch_size': 8, 'flops': 3164007441664}, {'type': 'perplexity', 'in_batch_dim': [8, 320], 'batch_size': 8, 'flops': 6328014815744}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5062411866112}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6011614078336}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4746011128704}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6011614078336}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3796808916480}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4429610391296}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4113209653888}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4746011128704}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3480408179072}, {'type': 'perplexity', 'in_batch_dim': [8, 384], 'batch_size': 8, 'flops': 7593617765376}, {'type': 'perplexity', 'in_batch_dim': [8, 576], 'batch_size': 8, 'flops': 11390426614272}, {'type': 'perplexity', 'in_batch_dim': [8, 160], 'batch_size': 8, 'flops': 3164007441664}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4746011128704}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4746011128704}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4429610391296}, {'type': 'perplexity', 'in_batch_dim': [8, 336], 'batch_size': 8, 'flops': 6644415553152}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4746011128704}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3796808916480}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3796808916480}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5062411866112}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5378812603520}, {'type': 'perplexity', 'in_batch_dim': [8, 368], 'batch_size': 8, 'flops': 7277217027968}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3480408179072}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4113209653888}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4746011128704}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3480408179072}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4746011128704}, {'type': 'perplexity', 'in_batch_dim': [8, 1168], 'batch_size': 8, 'flops': 23097253898368}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5062411866112}, {'type': 'perplexity', 'in_batch_dim': [8, 160], 'batch_size': 8, 'flops': 3164007441664}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4746011128704}, {'type': 'perplexity', 'in_batch_dim': [8, 640], 'batch_size': 8, 'flops': 12656029563904}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5378812603520}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4429610391296}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4429610391296}, {'type': 'perplexity', 'in_batch_dim': [8, 384], 'batch_size': 8, 'flops': 7593617765376}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3480408179072}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4113209653888}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3796808916480}, {'type': 'perplexity', 'in_batch_dim': [8, 336], 'batch_size': 8, 'flops': 6644415553152}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3480408179072}, {'type': 'perplexity', 'in_batch_dim': [6, 208], 'batch_size': 8, 'flops': 4113209653888}], 'timestamp': '2025-09-10 02:28:02.807246', 'step': 5733, 'epoch': 3} {'type': 'pplx', 'content': 22101991.669623252, 'timestamp': '2025-09-10 02:28:02.810174', 'step': 5733, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 4746299996032}, 'timestamp': '2025-09-10 02:28:02.840101', 'step': 5733, 'epoch': 3} {'type': 'loss', 'content': 0.033737100660800934, 'timestamp': '2025-09-10 02:28:02.842373', 'step': 5734, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-10 02:28:02.876069', 'step': 5734, 'epoch': 3} {'type': 'loss', 'content': 0.00036468225880526006, 'timestamp': '2025-09-10 02:28:02.878737', 'step': 5735, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 6644714900096}, 'timestamp': '2025-09-10 02:28:02.910075', 'step': 5735, 'epoch': 3} {'type': 'loss', 'content': 0.009591284207999706, 'timestamp': '2025-09-10 02:28:02.937888', 'step': 5736, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 320], 'flops': 9492337256192}, 'timestamp': '2025-09-10 02:28:02.969710', 'step': 5736, 'epoch': 3} {'type': 'loss', 'content': 0.0036816957872360945, 'timestamp': '2025-09-10 02:28:02.980055', 'step': 5737, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 384], 'flops': 11390752160256}, 'timestamp': '2025-09-10 02:28:03.016287', 'step': 5737, 'epoch': 3} {'type': 'loss', 'content': 0.0011977337999269366, 'timestamp': '2025-09-10 02:28:03.030182', 'step': 5738, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 416], 'flops': 12339959612288}, 'timestamp': '2025-09-10 02:28:03.069816', 'step': 5738, 'epoch': 3} {'type': 'loss', 'content': 0.0003067262005060911, 'timestamp': '2025-09-10 02:28:03.085708', 'step': 5739, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 336], 'flops': 9966940982208}, 'timestamp': '2025-09-10 02:28:03.121082', 'step': 5739, 'epoch': 3} {'type': 'loss', 'content': 0.026271553710103035, 'timestamp': '2025-09-10 02:28:03.155396', 'step': 5740, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 304], 'flops': 9017733530176}, 'timestamp': '2025-09-10 02:28:03.189088', 'step': 5740, 'epoch': 3} {'type': 'loss', 'content': 0.0015502488240599632, 'timestamp': '2025-09-10 02:28:03.198824', 'step': 5741, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 384], 'flops': 11390752160256}, 'timestamp': '2025-09-10 02:28:03.235146', 'step': 5741, 'epoch': 3} {'type': 'loss', 'content': 0.00022286844614427537, 'timestamp': '2025-09-10 02:28:03.249150', 'step': 5742, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 8543129804160}, 'timestamp': '2025-09-10 02:28:03.283744', 'step': 5742, 'epoch': 3} {'type': 'loss', 'content': 0.0005459203966893256, 'timestamp': '2025-09-10 02:28:03.294604', 'step': 5743, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 8068526078144}, 'timestamp': '2025-09-10 02:28:03.329955', 'step': 5743, 'epoch': 3} {'type': 'loss', 'content': 0.0006617820472456515, 'timestamp': '2025-09-10 02:28:03.360954', 'step': 5744, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 5695507448064}, 'timestamp': '2025-09-10 02:28:03.393855', 'step': 5744, 'epoch': 3} {'type': 'loss', 'content': 0.00014259156887419522, 'timestamp': '2025-09-10 02:28:03.396424', 'step': 5745, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 4746299996032}, 'timestamp': '2025-09-10 02:28:03.434320', 'step': 5745, 'epoch': 3} {'type': 'loss', 'content': 0.004075972363352776, 'timestamp': '2025-09-10 02:28:03.437045', 'step': 5746, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 6644714900096}, 'timestamp': '2025-09-10 02:28:03.469204', 'step': 5746, 'epoch': 3} {'type': 'loss', 'content': 0.0010417302837595344, 'timestamp': '2025-09-10 02:28:03.476114', 'step': 5747, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 4746299996032}, 'timestamp': '2025-09-10 02:28:03.507167', 'step': 5747, 'epoch': 3} {'type': 'loss', 'content': 0.0031860729213804007, 'timestamp': '2025-09-10 02:28:03.531694', 'step': 5748, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 7119318626112}, 'timestamp': '2025-09-10 02:28:03.562989', 'step': 5748, 'epoch': 3} {'type': 'loss', 'content': 6.74440452712588e-05, 'timestamp': '2025-09-10 02:28:03.568360', 'step': 5749, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 6644714900096}, 'timestamp': '2025-09-10 02:28:03.599347', 'step': 5749, 'epoch': 3} {'type': 'loss', 'content': 0.0004242011927999556, 'timestamp': '2025-09-10 02:28:03.606428', 'step': 5750, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 320], 'flops': 9492337256192}, 'timestamp': '2025-09-10 02:28:03.638907', 'step': 5750, 'epoch': 3} {'type': 'loss', 'content': 0.002124165650457144, 'timestamp': '2025-09-10 02:28:03.651497', 'step': 5751, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 176], 'flops': 5220903722048}, 'timestamp': '2025-09-10 02:28:03.685467', 'step': 5751, 'epoch': 3} {'type': 'loss', 'content': 0.0002149190113414079, 'timestamp': '2025-09-10 02:28:03.710926', 'step': 5752, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 5695507448064}, 'timestamp': '2025-09-10 02:28:03.743262', 'step': 5752, 'epoch': 3} {'type': 'loss', 'content': 0.003125338116660714, 'timestamp': '2025-09-10 02:28:03.745774', 'step': 5753, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 304], 'flops': 9017733530176}, 'timestamp': '2025-09-10 02:28:03.780095', 'step': 5753, 'epoch': 3} {'type': 'loss', 'content': 0.010561124421656132, 'timestamp': '2025-09-10 02:28:03.791977', 'step': 5754, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 384], 'flops': 11390752160256}, 'timestamp': '2025-09-10 02:28:03.832603', 'step': 5754, 'epoch': 3} {'type': 'loss', 'content': 0.0001346966892015189, 'timestamp': '2025-09-10 02:28:03.846651', 'step': 5755, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 8068526078144}, 'timestamp': '2025-09-10 02:28:03.878073', 'step': 5755, 'epoch': 3} {'type': 'loss', 'content': 0.0008288529934361577, 'timestamp': '2025-09-10 02:28:03.909071', 'step': 5756, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 7119318626112}, 'timestamp': '2025-09-10 02:28:03.939746', 'step': 5756, 'epoch': 3} {'type': 'loss', 'content': 0.022131487727165222, 'timestamp': '2025-09-10 02:28:03.944860', 'step': 5757, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 7119318626112}, 'timestamp': '2025-09-10 02:28:03.977124', 'step': 5757, 'epoch': 3} {'type': 'loss', 'content': 0.00015002823784016073, 'timestamp': '2025-09-10 02:28:03.984507', 'step': 5758, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 304], 'flops': 9017733530176}, 'timestamp': '2025-09-10 02:28:04.020408', 'step': 5758, 'epoch': 3} {'type': 'loss', 'content': 0.0003043616015929729, 'timestamp': '2025-09-10 02:28:04.032525', 'step': 5759, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 432], 'flops': 12814563338304}, 'timestamp': '2025-09-10 02:28:04.072446', 'step': 5759, 'epoch': 3} {'type': 'loss', 'content': 5.715518636861816e-05, 'timestamp': '2025-09-10 02:28:04.109495', 'step': 5760, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 7119318626112}, 'timestamp': '2025-09-10 02:28:04.141618', 'step': 5760, 'epoch': 3} {'type': 'loss', 'content': 0.00012244329263921827, 'timestamp': '2025-09-10 02:28:04.146827', 'step': 5761, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 6644714900096}, 'timestamp': '2025-09-10 02:28:04.178768', 'step': 5761, 'epoch': 3} {'type': 'loss', 'content': 0.004466865211725235, 'timestamp': '2025-09-10 02:28:04.185715', 'step': 5762, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 304], 'flops': 9017733530176}, 'timestamp': '2025-09-10 02:28:04.217142', 'step': 5762, 'epoch': 3} {'type': 'loss', 'content': 0.0006833565421402454, 'timestamp': '2025-09-10 02:28:04.229486', 'step': 5763, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 7119318626112}, 'timestamp': '2025-09-10 02:28:04.260629', 'step': 5763, 'epoch': 3} {'type': 'loss', 'content': 0.0013124813558533788, 'timestamp': '2025-09-10 02:28:04.289011', 'step': 5764, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 8068526078144}, 'timestamp': '2025-09-10 02:28:04.321369', 'step': 5764, 'epoch': 3} {'type': 'loss', 'content': 0.0013321618316695094, 'timestamp': '2025-09-10 02:28:04.328935', 'step': 5765, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 336], 'flops': 9966940982208}, 'timestamp': '2025-09-10 02:28:04.363050', 'step': 5765, 'epoch': 3} {'type': 'loss', 'content': 8.689150854479522e-05, 'timestamp': '2025-09-10 02:28:04.376445', 'step': 5766, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 176], 'flops': 5220903722048}, 'timestamp': '2025-09-10 02:28:04.408901', 'step': 5766, 'epoch': 3} {'type': 'loss', 'content': 0.0008448630687780678, 'timestamp': '2025-09-10 02:28:04.412863', 'step': 5767, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 336], 'flops': 9966940982208}, 'timestamp': '2025-09-10 02:28:04.447011', 'step': 5767, 'epoch': 3} {'type': 'loss', 'content': 7.289824134204537e-05, 'timestamp': '2025-09-10 02:28:04.481249', 'step': 5768, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 8068526078144}, 'timestamp': '2025-09-10 02:28:04.513451', 'step': 5768, 'epoch': 3} {'type': 'loss', 'content': 0.0007902790675871074, 'timestamp': '2025-09-10 02:28:04.521242', 'step': 5769, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 7593922352128}, 'timestamp': '2025-09-10 02:28:04.552772', 'step': 5769, 'epoch': 3} {'type': 'loss', 'content': 0.018035726621747017, 'timestamp': '2025-09-10 02:28:04.560464', 'step': 5770, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 7593922352128}, 'timestamp': '2025-09-10 02:28:04.594612', 'step': 5770, 'epoch': 3} {'type': 'loss', 'content': 0.0003475056146271527, 'timestamp': '2025-09-10 02:28:04.602399', 'step': 5771, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 7593922352128}, 'timestamp': '2025-09-10 02:28:04.634735', 'step': 5771, 'epoch': 3} {'type': 'loss', 'content': 0.001296757603995502, 'timestamp': '2025-09-10 02:28:04.663414', 'step': 5772, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 7593922352128}, 'timestamp': '2025-09-10 02:28:04.695104', 'step': 5772, 'epoch': 3} {'type': 'loss', 'content': 0.0012029794743284583, 'timestamp': '2025-09-10 02:28:04.700693', 'step': 5773, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 7593922352128}, 'timestamp': '2025-09-10 02:28:04.731906', 'step': 5773, 'epoch': 3} {'type': 'loss', 'content': 0.002924522617831826, 'timestamp': '2025-09-10 02:28:04.739804', 'step': 5774, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 5695507448064}, 'timestamp': '2025-09-10 02:28:04.771603', 'step': 5774, 'epoch': 3} {'type': 'loss', 'content': 0.00025918486062437296, 'timestamp': '2025-09-10 02:28:04.776217', 'step': 5775, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 352], 'flops': 10441544708224}, 'timestamp': '2025-09-10 02:28:04.811165', 'step': 5775, 'epoch': 3} {'type': 'loss', 'content': 0.00013173665502108634, 'timestamp': '2025-09-10 02:28:04.845733', 'step': 5776, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 8068526078144}, 'timestamp': '2025-09-10 02:28:04.877086', 'step': 5776, 'epoch': 3} {'type': 'loss', 'content': 0.01582178846001625, 'timestamp': '2025-09-10 02:28:04.884742', 'step': 5777, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 480], 'flops': 14238374516352}, 'timestamp': '2025-09-10 02:28:04.927710', 'step': 5777, 'epoch': 3} {'type': 'loss', 'content': 0.0009278925135731697, 'timestamp': '2025-09-10 02:28:04.945062', 'step': 5778, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 6644714900096}, 'timestamp': '2025-09-10 02:28:04.977713', 'step': 5778, 'epoch': 3} {'type': 'loss', 'content': 0.0010470326524227858, 'timestamp': '2025-09-10 02:28:04.984773', 'step': 5779, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 6170111174080}, 'timestamp': '2025-09-10 02:28:05.016401', 'step': 5779, 'epoch': 3} {'type': 'loss', 'content': 0.0019801973830908537, 'timestamp': '2025-09-10 02:28:05.043954', 'step': 5780, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 6170111174080}, 'timestamp': '2025-09-10 02:28:05.074622', 'step': 5780, 'epoch': 3} {'type': 'loss', 'content': 0.0018586774822324514, 'timestamp': '2025-09-10 02:28:05.079203', 'step': 5781, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 8068526078144}, 'timestamp': '2025-09-10 02:28:05.109637', 'step': 5781, 'epoch': 3} {'type': 'loss', 'content': 0.00046877076965756714, 'timestamp': '2025-09-10 02:28:05.119815', 'step': 5782, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 7119318626112}, 'timestamp': '2025-09-10 02:28:05.150944', 'step': 5782, 'epoch': 3} {'type': 'loss', 'content': 0.00047852486022748053, 'timestamp': '2025-09-10 02:28:05.158370', 'step': 5783, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 6644714900096}, 'timestamp': '2025-09-10 02:28:05.190177', 'step': 5783, 'epoch': 3} {'type': 'loss', 'content': 0.00019953006994910538, 'timestamp': '2025-09-10 02:28:05.218090', 'step': 5784, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 8068526078144}, 'timestamp': '2025-09-10 02:28:05.248152', 'step': 5784, 'epoch': 3} {'type': 'loss', 'content': 0.028778070583939552, 'timestamp': '2025-09-10 02:28:05.256231', 'step': 5785, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 7593922352128}, 'timestamp': '2025-09-10 02:28:05.287974', 'step': 5785, 'epoch': 3} {'type': 'loss', 'content': 0.00035870965803042054, 'timestamp': '2025-09-10 02:28:05.295459', 'step': 5786, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 7593922352128}, 'timestamp': '2025-09-10 02:28:05.327021', 'step': 5786, 'epoch': 3} {'type': 'loss', 'content': 0.00494037102907896, 'timestamp': '2025-09-10 02:28:05.334576', 'step': 5787, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 6170111174080}, 'timestamp': '2025-09-10 02:28:05.366972', 'step': 5787, 'epoch': 3} {'type': 'loss', 'content': 0.0017796893371269107, 'timestamp': '2025-09-10 02:28:05.394417', 'step': 5788, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 6170111174080}, 'timestamp': '2025-09-10 02:28:05.425926', 'step': 5788, 'epoch': 3} {'type': 'loss', 'content': 0.003134679514914751, 'timestamp': '2025-09-10 02:28:05.430593', 'step': 5789, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 5695507448064}, 'timestamp': '2025-09-10 02:28:05.461991', 'step': 5789, 'epoch': 3} {'type': 'loss', 'content': 0.0004141936369705945, 'timestamp': '2025-09-10 02:28:05.466566', 'step': 5790, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 4746299996032}, 'timestamp': '2025-09-10 02:28:05.498350', 'step': 5790, 'epoch': 3} {'type': 'loss', 'content': 0.000323984568240121, 'timestamp': '2025-09-10 02:28:05.501044', 'step': 5791, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 6644714900096}, 'timestamp': '2025-09-10 02:28:05.532998', 'step': 5791, 'epoch': 3} {'type': 'loss', 'content': 0.0023834407329559326, 'timestamp': '2025-09-10 02:28:05.560963', 'step': 5792, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 7593922352128}, 'timestamp': '2025-09-10 02:28:05.607438', 'step': 5792, 'epoch': 3} {'type': 'loss', 'content': 0.00019796183914877474, 'timestamp': '2025-09-10 02:28:05.612967', 'step': 5793, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 7593922352128}, 'timestamp': '2025-09-10 02:28:05.643451', 'step': 5793, 'epoch': 3} {'type': 'loss', 'content': 0.002645928878337145, 'timestamp': '2025-09-10 02:28:05.651231', 'step': 5794, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 6644714900096}, 'timestamp': '2025-09-10 02:28:05.681908', 'step': 5794, 'epoch': 3} {'type': 'loss', 'content': 0.0006678312201984227, 'timestamp': '2025-09-10 02:28:05.688973', 'step': 5795, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 368], 'flops': 10916148434240}, 'timestamp': '2025-09-10 02:28:05.732261', 'step': 5795, 'epoch': 3} {'type': 'loss', 'content': 0.0002754285524133593, 'timestamp': '2025-09-10 02:28:05.766953', 'step': 5796, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 7593922352128}, 'timestamp': '2025-09-10 02:28:05.798145', 'step': 5796, 'epoch': 3} {'type': 'loss', 'content': 0.0007207631133496761, 'timestamp': '2025-09-10 02:28:05.803576', 'step': 5797, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 6170111174080}, 'timestamp': '2025-09-10 02:28:05.835472', 'step': 5797, 'epoch': 3} {'type': 'loss', 'content': 0.0006030822987668216, 'timestamp': '2025-09-10 02:28:05.842377', 'step': 5798, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 8543129804160}, 'timestamp': '2025-09-10 02:28:05.873976', 'step': 5798, 'epoch': 3} {'type': 'loss', 'content': 0.00043905325583182275, 'timestamp': '2025-09-10 02:28:05.884375', 'step': 5799, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 6170111174080}, 'timestamp': '2025-09-10 02:28:05.919505', 'step': 5799, 'epoch': 3} {'type': 'loss', 'content': 0.00043995765736326575, 'timestamp': '2025-09-10 02:28:05.947055', 'step': 5800, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 8543129804160}, 'timestamp': '2025-09-10 02:28:05.979533', 'step': 5800, 'epoch': 3} {'type': 'loss', 'content': 0.0005144139868207276, 'timestamp': '2025-09-10 02:28:05.987903', 'step': 5801, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 8068526078144}, 'timestamp': '2025-09-10 02:28:06.019258', 'step': 5801, 'epoch': 3} {'type': 'loss', 'content': 0.0005805970868095756, 'timestamp': '2025-09-10 02:28:06.029217', 'step': 5802, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 7119318626112}, 'timestamp': '2025-09-10 02:28:06.061900', 'step': 5802, 'epoch': 3} {'type': 'loss', 'content': 0.004789031110703945, 'timestamp': '2025-09-10 02:28:06.069365', 'step': 5803, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 8543129804160}, 'timestamp': '2025-09-10 02:28:06.100953', 'step': 5803, 'epoch': 3} {'type': 'loss', 'content': 0.00047223473666235805, 'timestamp': '2025-09-10 02:28:06.132849', 'step': 5804, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 6644714900096}, 'timestamp': '2025-09-10 02:28:06.165408', 'step': 5804, 'epoch': 3} {'type': 'loss', 'content': 0.000832175777759403, 'timestamp': '2025-09-10 02:28:06.170104', 'step': 5805, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 8543129804160}, 'timestamp': '2025-09-10 02:28:06.201542', 'step': 5805, 'epoch': 3} {'type': 'loss', 'content': 0.00040307757444679737, 'timestamp': '2025-09-10 02:28:06.212414', 'step': 5806, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 304], 'flops': 9017733530176}, 'timestamp': '2025-09-10 02:28:06.243676', 'step': 5806, 'epoch': 3} {'type': 'loss', 'content': 0.0005037328810431063, 'timestamp': '2025-09-10 02:28:06.256030', 'step': 5807, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 6170111174080}, 'timestamp': '2025-09-10 02:28:06.287141', 'step': 5807, 'epoch': 3} {'type': 'loss', 'content': 0.0005597766139544547, 'timestamp': '2025-09-10 02:28:06.314819', 'step': 5808, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 8068526078144}, 'timestamp': '2025-09-10 02:28:06.346542', 'step': 5808, 'epoch': 3} {'type': 'loss', 'content': 0.008857275359332561, 'timestamp': '2025-09-10 02:28:06.354152', 'step': 5809, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 176], 'flops': 5220903722048}, 'timestamp': '2025-09-10 02:28:06.387344', 'step': 5809, 'epoch': 3} {'type': 'loss', 'content': 0.0009536809520795941, 'timestamp': '2025-09-10 02:28:06.391070', 'step': 5810, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 6170111174080}, 'timestamp': '2025-09-10 02:28:06.423975', 'step': 5810, 'epoch': 3} {'type': 'loss', 'content': 0.0008017036016099155, 'timestamp': '2025-09-10 02:28:06.430461', 'step': 5811, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 320], 'flops': 9492337256192}, 'timestamp': '2025-09-10 02:28:06.463001', 'step': 5811, 'epoch': 3} {'type': 'loss', 'content': 0.0008804936660453677, 'timestamp': '2025-09-10 02:28:06.496465', 'step': 5812, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 400], 'flops': 11865355886272}, 'timestamp': '2025-09-10 02:28:06.534358', 'step': 5812, 'epoch': 3} {'type': 'loss', 'content': 0.0013134771725162864, 'timestamp': '2025-09-10 02:28:06.549509', 'step': 5813, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 176], 'flops': 5220903722048}, 'timestamp': '2025-09-10 02:28:06.581408', 'step': 5813, 'epoch': 3} {'type': 'loss', 'content': 0.0001699960557743907, 'timestamp': '2025-09-10 02:28:06.585313', 'step': 5814, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 7593922352128}, 'timestamp': '2025-09-10 02:28:06.620356', 'step': 5814, 'epoch': 3} {'type': 'loss', 'content': 0.00022905482910573483, 'timestamp': '2025-09-10 02:28:06.627965', 'step': 5815, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 176], 'flops': 5220903722048}, 'timestamp': '2025-09-10 02:28:06.659439', 'step': 5815, 'epoch': 3} {'type': 'loss', 'content': 0.005473580211400986, 'timestamp': '2025-09-10 02:28:06.684299', 'step': 5816, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 336], 'flops': 9966940982208}, 'timestamp': '2025-09-10 02:28:06.717091', 'step': 5816, 'epoch': 3} {'type': 'loss', 'content': 0.00022384269686881453, 'timestamp': '2025-09-10 02:28:06.729757', 'step': 5817, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 8068526078144}, 'timestamp': '2025-09-10 02:28:06.761616', 'step': 5817, 'epoch': 3} {'type': 'loss', 'content': 0.0034808197524398565, 'timestamp': '2025-09-10 02:28:06.771789', 'step': 5818, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 5695507448064}, 'timestamp': '2025-09-10 02:28:06.804176', 'step': 5818, 'epoch': 3} {'type': 'loss', 'content': 0.0008100624545477331, 'timestamp': '2025-09-10 02:28:06.808494', 'step': 5819, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 4746299996032}, 'timestamp': '2025-09-10 02:28:06.840180', 'step': 5819, 'epoch': 3} {'type': 'loss', 'content': 0.0004766239726450294, 'timestamp': '2025-09-10 02:28:06.865355', 'step': 5820, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 352], 'flops': 10441544708224}, 'timestamp': '2025-09-10 02:28:06.898933', 'step': 5820, 'epoch': 3} {'type': 'loss', 'content': 0.00017594116798136383, 'timestamp': '2025-09-10 02:28:06.911961', 'step': 5821, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 304], 'flops': 9017733530176}, 'timestamp': '2025-09-10 02:28:06.948075', 'step': 5821, 'epoch': 3} {'type': 'loss', 'content': 8.833243191475049e-05, 'timestamp': '2025-09-10 02:28:06.960014', 'step': 5822, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 6170111174080}, 'timestamp': '2025-09-10 02:28:06.992918', 'step': 5822, 'epoch': 3} {'type': 'loss', 'content': 0.00015262920351233333, 'timestamp': '2025-09-10 02:28:06.999660', 'step': 5823, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 7593922352128}, 'timestamp': '2025-09-10 02:28:07.034959', 'step': 5823, 'epoch': 3} {'type': 'loss', 'content': 0.00041047646664083004, 'timestamp': '2025-09-10 02:28:07.063560', 'step': 5824, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 5695507448064}, 'timestamp': '2025-09-10 02:28:07.096517', 'step': 5824, 'epoch': 3} {'type': 'loss', 'content': 0.0002567381889093667, 'timestamp': '2025-09-10 02:28:07.098731', 'step': 5825, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 176], 'flops': 5220903722048}, 'timestamp': '2025-09-10 02:28:07.132739', 'step': 5825, 'epoch': 3} {'type': 'loss', 'content': 0.04794417694211006, 'timestamp': '2025-09-10 02:28:07.136836', 'step': 5826, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 6170111174080}, 'timestamp': '2025-09-10 02:28:07.168906', 'step': 5826, 'epoch': 3} {'type': 'loss', 'content': 0.0002849227748811245, 'timestamp': '2025-09-10 02:28:07.175809', 'step': 5827, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 6644714900096}, 'timestamp': '2025-09-10 02:28:07.206278', 'step': 5827, 'epoch': 3} {'type': 'loss', 'content': 0.000525909592397511, 'timestamp': '2025-09-10 02:28:07.234310', 'step': 5828, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 7119318626112}, 'timestamp': '2025-09-10 02:28:07.265525', 'step': 5828, 'epoch': 3} {'type': 'loss', 'content': 0.0005727821262553334, 'timestamp': '2025-09-10 02:28:07.270526', 'step': 5829, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 7593922352128}, 'timestamp': '2025-09-10 02:28:07.308371', 'step': 5829, 'epoch': 3} {'type': 'loss', 'content': 0.00017900993407238275, 'timestamp': '2025-09-10 02:28:07.317078', 'step': 5830, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 5695507448064}, 'timestamp': '2025-09-10 02:28:07.355057', 'step': 5830, 'epoch': 3} {'type': 'loss', 'content': 0.00022803548199590296, 'timestamp': '2025-09-10 02:28:07.359320', 'step': 5831, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 5695507448064}, 'timestamp': '2025-09-10 02:28:07.390801', 'step': 5831, 'epoch': 3} {'type': 'loss', 'content': 0.00024710877914913, 'timestamp': '2025-09-10 02:28:07.416686', 'step': 5832, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 5695507448064}, 'timestamp': '2025-09-10 02:28:07.447536', 'step': 5832, 'epoch': 3} {'type': 'loss', 'content': 0.00446285679936409, 'timestamp': '2025-09-10 02:28:07.450094', 'step': 5833, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 7593922352128}, 'timestamp': '2025-09-10 02:28:07.480424', 'step': 5833, 'epoch': 3} {'type': 'loss', 'content': 0.0004049288108944893, 'timestamp': '2025-09-10 02:28:07.488080', 'step': 5834, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 6644714900096}, 'timestamp': '2025-09-10 02:28:07.524052', 'step': 5834, 'epoch': 3} {'type': 'loss', 'content': 0.00038109347224235535, 'timestamp': '2025-09-10 02:28:07.531088', 'step': 5835, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 176], 'flops': 5220903722048}, 'timestamp': '2025-09-10 02:28:07.563022', 'step': 5835, 'epoch': 3} {'type': 'loss', 'content': 0.00032000825740396976, 'timestamp': '2025-09-10 02:28:07.587896', 'step': 5836, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 7593922352128}, 'timestamp': '2025-09-10 02:28:07.619266', 'step': 5836, 'epoch': 3} {'type': 'loss', 'content': 0.0012009877245873213, 'timestamp': '2025-09-10 02:28:07.624814', 'step': 5837, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 7593922352128}, 'timestamp': '2025-09-10 02:28:07.655720', 'step': 5837, 'epoch': 3} {'type': 'loss', 'content': 0.0006187800318002701, 'timestamp': '2025-09-10 02:28:07.663362', 'step': 5838, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 336], 'flops': 9966940982208}, 'timestamp': '2025-09-10 02:28:07.697095', 'step': 5838, 'epoch': 3} {'type': 'loss', 'content': 0.0020875423215329647, 'timestamp': '2025-09-10 02:28:07.710498', 'step': 5839, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 6644714900096}, 'timestamp': '2025-09-10 02:28:07.741626', 'step': 5839, 'epoch': 3} {'type': 'loss', 'content': 0.0002056649245787412, 'timestamp': '2025-09-10 02:28:07.769542', 'step': 5840, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 176], 'flops': 5220903722048}, 'timestamp': '2025-09-10 02:28:07.800908', 'step': 5840, 'epoch': 3} {'type': 'loss', 'content': 0.0004982929094694555, 'timestamp': '2025-09-10 02:28:07.803353', 'step': 5841, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 7119318626112}, 'timestamp': '2025-09-10 02:28:07.835570', 'step': 5841, 'epoch': 3} {'type': 'loss', 'content': 0.000268876610789448, 'timestamp': '2025-09-10 02:28:07.842998', 'step': 5842, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 336], 'flops': 9966940982208}, 'timestamp': '2025-09-10 02:28:07.876751', 'step': 5842, 'epoch': 3} {'type': 'loss', 'content': 0.0005106105236336589, 'timestamp': '2025-09-10 02:28:07.890146', 'step': 5843, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 7119318626112}, 'timestamp': '2025-09-10 02:28:07.924353', 'step': 5843, 'epoch': 3} {'type': 'loss', 'content': 7.785356137901545e-05, 'timestamp': '2025-09-10 02:28:07.952639', 'step': 5844, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 8068526078144}, 'timestamp': '2025-09-10 02:28:07.983801', 'step': 5844, 'epoch': 3} {'type': 'loss', 'content': 0.0004512739833444357, 'timestamp': '2025-09-10 02:28:07.991443', 'step': 5845, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 176], 'flops': 5220903722048}, 'timestamp': '2025-09-10 02:28:08.023478', 'step': 5845, 'epoch': 3} {'type': 'loss', 'content': 0.0006680196383967996, 'timestamp': '2025-09-10 02:28:08.027553', 'step': 5846, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 6644714900096}, 'timestamp': '2025-09-10 02:28:08.058692', 'step': 5846, 'epoch': 3} {'type': 'loss', 'content': 0.0003025097248610109, 'timestamp': '2025-09-10 02:28:08.065598', 'step': 5847, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 6644714900096}, 'timestamp': '2025-09-10 02:28:08.097241', 'step': 5847, 'epoch': 3} {'type': 'loss', 'content': 0.005206712055951357, 'timestamp': '2025-09-10 02:28:08.125006', 'step': 5848, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 6644714900096}, 'timestamp': '2025-09-10 02:28:08.156546', 'step': 5848, 'epoch': 3} {'type': 'loss', 'content': 0.008312045596539974, 'timestamp': '2025-09-10 02:28:08.161170', 'step': 5849, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 7119318626112}, 'timestamp': '2025-09-10 02:28:08.192371', 'step': 5849, 'epoch': 3} {'type': 'loss', 'content': 0.0003203331143595278, 'timestamp': '2025-09-10 02:28:08.199841', 'step': 5850, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 400], 'flops': 11865355886272}, 'timestamp': '2025-09-10 02:28:08.238100', 'step': 5850, 'epoch': 3} {'type': 'loss', 'content': 0.0007190610049292445, 'timestamp': '2025-09-10 02:28:08.253892', 'step': 5851, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 5695507448064}, 'timestamp': '2025-09-10 02:28:08.285276', 'step': 5851, 'epoch': 3} {'type': 'loss', 'content': 0.0019516788888722658, 'timestamp': '2025-09-10 02:28:08.310723', 'step': 5852, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 352], 'flops': 10441544708224}, 'timestamp': '2025-09-10 02:28:08.343465', 'step': 5852, 'epoch': 3} {'type': 'loss', 'content': 0.0022987746633589268, 'timestamp': '2025-09-10 02:28:08.356560', 'step': 5853, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 4746299996032}, 'timestamp': '2025-09-10 02:28:08.387174', 'step': 5853, 'epoch': 3} {'type': 'loss', 'content': 0.00023405192769132555, 'timestamp': '2025-09-10 02:28:08.389924', 'step': 5854, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 7593922352128}, 'timestamp': '2025-09-10 02:28:08.421570', 'step': 5854, 'epoch': 3} {'type': 'loss', 'content': 0.00021774417837150395, 'timestamp': '2025-09-10 02:28:08.429305', 'step': 5855, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 8543129804160}, 'timestamp': '2025-09-10 02:28:08.461401', 'step': 5855, 'epoch': 3} {'type': 'loss', 'content': 0.00024362494878005236, 'timestamp': '2025-09-10 02:28:08.492667', 'step': 5856, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 7119318626112}, 'timestamp': '2025-09-10 02:28:08.525393', 'step': 5856, 'epoch': 3} {'type': 'loss', 'content': 0.0006450935616157949, 'timestamp': '2025-09-10 02:28:08.530034', 'step': 5857, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 7593922352128}, 'timestamp': '2025-09-10 02:28:08.561832', 'step': 5857, 'epoch': 3} {'type': 'loss', 'content': 0.0009278419311158359, 'timestamp': '2025-09-10 02:28:08.569270', 'step': 5858, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 6644714900096}, 'timestamp': '2025-09-10 02:28:08.602237', 'step': 5858, 'epoch': 3} {'type': 'loss', 'content': 0.0007209957693703473, 'timestamp': '2025-09-10 02:28:08.608970', 'step': 5859, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 4746299996032}, 'timestamp': '2025-09-10 02:28:08.639784', 'step': 5859, 'epoch': 3} {'type': 'loss', 'content': 5.901495387661271e-05, 'timestamp': '2025-09-10 02:28:08.663893', 'step': 5860, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 6644714900096}, 'timestamp': '2025-09-10 02:28:08.697450', 'step': 5860, 'epoch': 3} {'type': 'loss', 'content': 0.000261887616943568, 'timestamp': '2025-09-10 02:28:08.701869', 'step': 5861, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 5695507448064}, 'timestamp': '2025-09-10 02:28:08.748593', 'step': 5861, 'epoch': 3} {'type': 'loss', 'content': 0.002333016600459814, 'timestamp': '2025-09-10 02:28:08.753260', 'step': 5862, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 7593922352128}, 'timestamp': '2025-09-10 02:28:08.785064', 'step': 5862, 'epoch': 3} {'type': 'loss', 'content': 0.00048499341937713325, 'timestamp': '2025-09-10 02:28:08.792648', 'step': 5863, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 7593922352128}, 'timestamp': '2025-09-10 02:28:08.824162', 'step': 5863, 'epoch': 3} {'type': 'loss', 'content': 0.0001742523891152814, 'timestamp': '2025-09-10 02:28:08.852884', 'step': 5864, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 7119318626112}, 'timestamp': '2025-09-10 02:28:08.884034', 'step': 5864, 'epoch': 3} {'type': 'loss', 'content': 0.00020447876886464655, 'timestamp': '2025-09-10 02:28:08.889211', 'step': 5865, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 304], 'flops': 9017733530176}, 'timestamp': '2025-09-10 02:28:08.922986', 'step': 5865, 'epoch': 3} {'type': 'loss', 'content': 0.0033087453339248896, 'timestamp': '2025-09-10 02:28:08.935260', 'step': 5866, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 6644714900096}, 'timestamp': '2025-09-10 02:28:08.965827', 'step': 5866, 'epoch': 3} {'type': 'loss', 'content': 0.000428111816290766, 'timestamp': '2025-09-10 02:28:08.972751', 'step': 5867, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 6170111174080}, 'timestamp': '2025-09-10 02:28:09.004500', 'step': 5867, 'epoch': 3} {'type': 'loss', 'content': 0.00031185123953036964, 'timestamp': '2025-09-10 02:28:09.031969', 'step': 5868, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 8068526078144}, 'timestamp': '2025-09-10 02:28:09.064488', 'step': 5868, 'epoch': 3} {'type': 'loss', 'content': 7.067446131259203e-05, 'timestamp': '2025-09-10 02:28:09.072320', 'step': 5869, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 7593922352128}, 'timestamp': '2025-09-10 02:28:09.103897', 'step': 5869, 'epoch': 3} {'type': 'loss', 'content': 0.0012084973277524114, 'timestamp': '2025-09-10 02:28:09.111514', 'step': 5870, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 176], 'flops': 5220903722048}, 'timestamp': '2025-09-10 02:28:09.148832', 'step': 5870, 'epoch': 3} {'type': 'loss', 'content': 0.0005114732775837183, 'timestamp': '2025-09-10 02:28:09.153362', 'step': 5871, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 6644714900096}, 'timestamp': '2025-09-10 02:28:09.191671', 'step': 5871, 'epoch': 3} {'type': 'loss', 'content': 0.00019311138021294028, 'timestamp': '2025-09-10 02:28:09.219678', 'step': 5872, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 6170111174080}, 'timestamp': '2025-09-10 02:28:09.259004', 'step': 5872, 'epoch': 3} {'type': 'loss', 'content': 0.00104613380972296, 'timestamp': '2025-09-10 02:28:09.264427', 'step': 5873, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 6170111174080}, 'timestamp': '2025-09-10 02:28:09.300484', 'step': 5873, 'epoch': 3} {'type': 'loss', 'content': 6.62386228214018e-05, 'timestamp': '2025-09-10 02:28:09.308028', 'step': 5874, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 8068526078144}, 'timestamp': '2025-09-10 02:28:09.340490', 'step': 5874, 'epoch': 3} {'type': 'loss', 'content': 9.228465205524117e-05, 'timestamp': '2025-09-10 02:28:09.350633', 'step': 5875, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 7593922352128}, 'timestamp': '2025-09-10 02:28:09.382056', 'step': 5875, 'epoch': 3} {'type': 'loss', 'content': 7.762354653095827e-05, 'timestamp': '2025-09-10 02:28:09.410773', 'step': 5876, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 7593922352128}, 'timestamp': '2025-09-10 02:28:09.441904', 'step': 5876, 'epoch': 3} {'type': 'loss', 'content': 0.0005466092843562365, 'timestamp': '2025-09-10 02:28:09.447342', 'step': 5877, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 176], 'flops': 5220903722048}, 'timestamp': '2025-09-10 02:28:09.479585', 'step': 5877, 'epoch': 3} {'type': 'loss', 'content': 0.0009989795507863164, 'timestamp': '2025-09-10 02:28:09.483748', 'step': 5878, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 7119318626112}, 'timestamp': '2025-09-10 02:28:09.514404', 'step': 5878, 'epoch': 3} {'type': 'loss', 'content': 7.646583253517747e-05, 'timestamp': '2025-09-10 02:28:09.521969', 'step': 5879, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 5695507448064}, 'timestamp': '2025-09-10 02:28:09.552415', 'step': 5879, 'epoch': 3} {'type': 'loss', 'content': 0.0011240827152505517, 'timestamp': '2025-09-10 02:28:09.577800', 'step': 5880, 'epoch': 3} {'type': 'flops', 'content': [{'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5695213340928}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3796808916480}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4429610391296}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4746011128704}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4746011128704}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6011614078336}, {'type': 'perplexity', 'in_batch_dim': [8, 736], 'batch_size': 8, 'flops': 14554433988352}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3796808916480}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4113209653888}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3796808916480}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4746011128704}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2847606704256}, {'type': 'perplexity', 'in_batch_dim': [8, 320], 'batch_size': 8, 'flops': 6328014815744}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5378812603520}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3796808916480}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4746011128704}, {'type': 'perplexity', 'in_batch_dim': [8, 352], 'batch_size': 8, 'flops': 6960816290560}, {'type': 'perplexity', 'in_batch_dim': [8, 384], 'batch_size': 8, 'flops': 7593617765376}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5378812603520}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4429610391296}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4113209653888}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5062411866112}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5062411866112}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4429610391296}, {'type': 'perplexity', 'in_batch_dim': [8, 480], 'batch_size': 8, 'flops': 9492022189824}, {'type': 'perplexity', 'in_batch_dim': [8, 448], 'batch_size': 8, 'flops': 8859220715008}, {'type': 'perplexity', 'in_batch_dim': [8, 320], 'batch_size': 8, 'flops': 6328014815744}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3796808916480}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6011614078336}, {'type': 'perplexity', 'in_batch_dim': [8, 320], 'batch_size': 8, 'flops': 6328014815744}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5695213340928}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4746011128704}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4429610391296}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5378812603520}, {'type': 'perplexity', 'in_batch_dim': [8, 544], 'batch_size': 8, 'flops': 10757625139456}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4429610391296}, {'type': 'perplexity', 'in_batch_dim': [8, 320], 'batch_size': 8, 'flops': 6328014815744}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4113209653888}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4113209653888}, {'type': 'perplexity', 'in_batch_dim': [8, 320], 'batch_size': 8, 'flops': 6328014815744}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3480408179072}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5062411866112}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6011614078336}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4429610391296}, {'type': 'perplexity', 'in_batch_dim': [8, 384], 'batch_size': 8, 'flops': 7593617765376}, {'type': 'perplexity', 'in_batch_dim': [8, 416], 'batch_size': 8, 'flops': 8226419240192}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4746011128704}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3480408179072}, {'type': 'perplexity', 'in_batch_dim': [8, 384], 'batch_size': 8, 'flops': 7593617765376}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3480408179072}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3480408179072}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5378812603520}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3796808916480}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4113209653888}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5062411866112}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5378812603520}, {'type': 'perplexity', 'in_batch_dim': [8, 320], 'batch_size': 8, 'flops': 6328014815744}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4429610391296}, {'type': 'perplexity', 'in_batch_dim': [8, 160], 'batch_size': 8, 'flops': 3164007441664}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4113209653888}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5695213340928}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3796808916480}, {'type': 'perplexity', 'in_batch_dim': [8, 160], 'batch_size': 8, 'flops': 3164007441664}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6011614078336}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5062411866112}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4429610391296}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3480408179072}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4746011128704}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3480408179072}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4113209653888}, {'type': 'perplexity', 'in_batch_dim': [8, 624], 'batch_size': 8, 'flops': 12339628826496}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4113209653888}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5695213340928}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5062411866112}, {'type': 'perplexity', 'in_batch_dim': [8, 384], 'batch_size': 8, 'flops': 7593617765376}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4429610391296}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4113209653888}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5062411866112}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4429610391296}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5695213340928}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3796808916480}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5378812603520}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4429610391296}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5378812603520}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3796808916480}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5695213340928}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4429610391296}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5062411866112}, {'type': 'perplexity', 'in_batch_dim': [8, 160], 'batch_size': 8, 'flops': 3164007441664}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4746011128704}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5695213340928}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3796808916480}, {'type': 'perplexity', 'in_batch_dim': [8, 416], 'batch_size': 8, 'flops': 8226419240192}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3796808916480}, {'type': 'perplexity', 'in_batch_dim': [8, 160], 'batch_size': 8, 'flops': 3164007441664}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4746011128704}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3796808916480}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2847606704256}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5378812603520}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4429610391296}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4113209653888}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4429610391296}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6011614078336}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4113209653888}, {'type': 'perplexity', 'in_batch_dim': [8, 320], 'batch_size': 8, 'flops': 6328014815744}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4746011128704}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4746011128704}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3796808916480}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5062411866112}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3480408179072}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3796808916480}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3480408179072}, {'type': 'perplexity', 'in_batch_dim': [8, 352], 'batch_size': 8, 'flops': 6960816290560}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5062411866112}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4429610391296}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6011614078336}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5062411866112}, {'type': 'perplexity', 'in_batch_dim': [8, 576], 'batch_size': 8, 'flops': 11390426614272}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4429610391296}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3796808916480}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6011614078336}, {'type': 'perplexity', 'in_batch_dim': [8, 432], 'batch_size': 8, 'flops': 8542819977600}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5378812603520}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5378812603520}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5062411866112}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4113209653888}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4113209653888}, {'type': 'perplexity', 'in_batch_dim': [8, 368], 'batch_size': 8, 'flops': 7277217027968}, {'type': 'perplexity', 'in_batch_dim': [8, 160], 'batch_size': 8, 'flops': 3164007441664}, {'type': 'perplexity', 'in_batch_dim': [8, 368], 'batch_size': 8, 'flops': 7277217027968}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3796808916480}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4746011128704}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4113209653888}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4429610391296}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5695213340928}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5695213340928}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5062411866112}, {'type': 'perplexity', 'in_batch_dim': [8, 432], 'batch_size': 8, 'flops': 8542819977600}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5062411866112}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4113209653888}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3480408179072}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3480408179072}, {'type': 'perplexity', 'in_batch_dim': [8, 160], 'batch_size': 8, 'flops': 3164007441664}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4113209653888}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3796808916480}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5062411866112}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 368], 'batch_size': 8, 'flops': 7277217027968}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5378812603520}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5378812603520}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5062411866112}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2847606704256}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4429610391296}, {'type': 'perplexity', 'in_batch_dim': [8, 320], 'batch_size': 8, 'flops': 6328014815744}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3796808916480}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4429610391296}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4113209653888}, {'type': 'perplexity', 'in_batch_dim': [8, 384], 'batch_size': 8, 'flops': 7593617765376}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5378812603520}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4746011128704}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3480408179072}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5062411866112}, {'type': 'perplexity', 'in_batch_dim': [8, 320], 'batch_size': 8, 'flops': 6328014815744}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5378812603520}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4113209653888}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4113209653888}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5695213340928}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3480408179072}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6011614078336}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4746011128704}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5378812603520}, {'type': 'perplexity', 'in_batch_dim': [8, 320], 'batch_size': 8, 'flops': 6328014815744}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4429610391296}, {'type': 'perplexity', 'in_batch_dim': [8, 320], 'batch_size': 8, 'flops': 6328014815744}, {'type': 'perplexity', 'in_batch_dim': [8, 416], 'batch_size': 8, 'flops': 8226419240192}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5062411866112}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5062411866112}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2847606704256}, {'type': 'perplexity', 'in_batch_dim': [8, 336], 'batch_size': 8, 'flops': 6644415553152}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6011614078336}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5062411866112}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4113209653888}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5695213340928}, {'type': 'perplexity', 'in_batch_dim': [8, 416], 'batch_size': 8, 'flops': 8226419240192}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3796808916480}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5695213340928}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4746011128704}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4429610391296}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4113209653888}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3480408179072}, {'type': 'perplexity', 'in_batch_dim': [8, 320], 'batch_size': 8, 'flops': 6328014815744}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3480408179072}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5378812603520}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6011614078336}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4429610391296}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5695213340928}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5695213340928}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3480408179072}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4429610391296}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5062411866112}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3480408179072}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4113209653888}, {'type': 'perplexity', 'in_batch_dim': [8, 352], 'batch_size': 8, 'flops': 6960816290560}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4113209653888}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2847606704256}, {'type': 'perplexity', 'in_batch_dim': [8, 160], 'batch_size': 8, 'flops': 3164007441664}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3796808916480}, {'type': 'perplexity', 'in_batch_dim': [8, 496], 'batch_size': 8, 'flops': 9808422927232}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3480408179072}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4113209653888}, {'type': 'perplexity', 'in_batch_dim': [8, 544], 'batch_size': 8, 'flops': 10757625139456}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6011614078336}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5378812603520}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5695213340928}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4746011128704}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5695213340928}, {'type': 'perplexity', 'in_batch_dim': [8, 384], 'batch_size': 8, 'flops': 7593617765376}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3480408179072}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5378812603520}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3480408179072}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5378812603520}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5062411866112}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3796808916480}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5062411866112}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5695213340928}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5378812603520}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4429610391296}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5062411866112}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3480408179072}, {'type': 'perplexity', 'in_batch_dim': [8, 656], 'batch_size': 8, 'flops': 12972430301312}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5062411866112}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3796808916480}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3480408179072}, {'type': 'perplexity', 'in_batch_dim': [8, 160], 'batch_size': 8, 'flops': 3164007441664}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5695213340928}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6011614078336}, {'type': 'perplexity', 'in_batch_dim': [8, 320], 'batch_size': 8, 'flops': 6328014815744}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5062411866112}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5062411866112}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5695213340928}, {'type': 'perplexity', 'in_batch_dim': [8, 320], 'batch_size': 8, 'flops': 6328014815744}, {'type': 'perplexity', 'in_batch_dim': [8, 384], 'batch_size': 8, 'flops': 7593617765376}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5062411866112}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4746011128704}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4113209653888}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4746011128704}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4746011128704}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3480408179072}, {'type': 'perplexity', 'in_batch_dim': [8, 320], 'batch_size': 8, 'flops': 6328014815744}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5695213340928}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5062411866112}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4113209653888}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4746011128704}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2847606704256}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3796808916480}, {'type': 'perplexity', 'in_batch_dim': [8, 336], 'batch_size': 8, 'flops': 6644415553152}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4113209653888}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4746011128704}, {'type': 'perplexity', 'in_batch_dim': [8, 496], 'batch_size': 8, 'flops': 9808422927232}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5378812603520}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5378812603520}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4429610391296}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5062411866112}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6011614078336}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4746011128704}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3796808916480}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4113209653888}, {'type': 'perplexity', 'in_batch_dim': [8, 496], 'batch_size': 8, 'flops': 9808422927232}, {'type': 'perplexity', 'in_batch_dim': [8, 448], 'batch_size': 8, 'flops': 8859220715008}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6011614078336}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3796808916480}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3480408179072}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5062411866112}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4429610391296}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4113209653888}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4746011128704}, {'type': 'perplexity', 'in_batch_dim': [8, 320], 'batch_size': 8, 'flops': 6328014815744}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6011614078336}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3480408179072}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3796808916480}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3796808916480}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4746011128704}, {'type': 'perplexity', 'in_batch_dim': [8, 416], 'batch_size': 8, 'flops': 8226419240192}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5695213340928}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6011614078336}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6011614078336}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4746011128704}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3480408179072}, {'type': 'perplexity', 'in_batch_dim': [8, 160], 'batch_size': 8, 'flops': 3164007441664}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5695213340928}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3480408179072}, {'type': 'perplexity', 'in_batch_dim': [8, 400], 'batch_size': 8, 'flops': 7910018502784}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4429610391296}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5378812603520}, {'type': 'perplexity', 'in_batch_dim': [8, 400], 'batch_size': 8, 'flops': 7910018502784}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5378812603520}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3480408179072}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6011614078336}, {'type': 'perplexity', 'in_batch_dim': [8, 160], 'batch_size': 8, 'flops': 3164007441664}, {'type': 'perplexity', 'in_batch_dim': [8, 352], 'batch_size': 8, 'flops': 6960816290560}, {'type': 'perplexity', 'in_batch_dim': [8, 320], 'batch_size': 8, 'flops': 6328014815744}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5062411866112}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5062411866112}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4113209653888}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4429610391296}, {'type': 'perplexity', 'in_batch_dim': [8, 352], 'batch_size': 8, 'flops': 6960816290560}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4113209653888}, {'type': 'perplexity', 'in_batch_dim': [8, 160], 'batch_size': 8, 'flops': 3164007441664}, {'type': 'perplexity', 'in_batch_dim': [8, 336], 'batch_size': 8, 'flops': 6644415553152}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4429610391296}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3796808916480}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5378812603520}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5378812603520}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4113209653888}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5378812603520}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3796808916480}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5378812603520}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5378812603520}, {'type': 'perplexity', 'in_batch_dim': [8, 160], 'batch_size': 8, 'flops': 3164007441664}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3480408179072}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6011614078336}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5695213340928}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5378812603520}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5378812603520}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4429610391296}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4746011128704}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6011614078336}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5062411866112}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6011614078336}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5378812603520}, {'type': 'perplexity', 'in_batch_dim': [8, 160], 'batch_size': 8, 'flops': 3164007441664}, {'type': 'perplexity', 'in_batch_dim': [8, 160], 'batch_size': 8, 'flops': 3164007441664}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5062411866112}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5062411866112}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3796808916480}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6011614078336}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4113209653888}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4746011128704}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5378812603520}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3480408179072}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3796808916480}, {'type': 'perplexity', 'in_batch_dim': [8, 320], 'batch_size': 8, 'flops': 6328014815744}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5062411866112}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4746011128704}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5062411866112}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4746011128704}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6011614078336}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3796808916480}, {'type': 'perplexity', 'in_batch_dim': [8, 160], 'batch_size': 8, 'flops': 3164007441664}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2847606704256}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4429610391296}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4746011128704}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5062411866112}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5378812603520}, {'type': 'perplexity', 'in_batch_dim': [8, 464], 'batch_size': 8, 'flops': 9175621452416}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4113209653888}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3480408179072}, {'type': 'perplexity', 'in_batch_dim': [8, 384], 'batch_size': 8, 'flops': 7593617765376}, {'type': 'perplexity', 'in_batch_dim': [8, 448], 'batch_size': 8, 'flops': 8859220715008}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5378812603520}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4113209653888}, {'type': 'perplexity', 'in_batch_dim': [8, 320], 'batch_size': 8, 'flops': 6328014815744}, {'type': 'perplexity', 'in_batch_dim': [8, 560], 'batch_size': 8, 'flops': 11074025876864}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5378812603520}, {'type': 'perplexity', 'in_batch_dim': [8, 160], 'batch_size': 8, 'flops': 3164007441664}, {'type': 'perplexity', 'in_batch_dim': [8, 320], 'batch_size': 8, 'flops': 6328014815744}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5062411866112}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6011614078336}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4746011128704}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6011614078336}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3796808916480}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4429610391296}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4113209653888}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4746011128704}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3480408179072}, {'type': 'perplexity', 'in_batch_dim': [8, 384], 'batch_size': 8, 'flops': 7593617765376}, {'type': 'perplexity', 'in_batch_dim': [8, 576], 'batch_size': 8, 'flops': 11390426614272}, {'type': 'perplexity', 'in_batch_dim': [8, 160], 'batch_size': 8, 'flops': 3164007441664}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4746011128704}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4746011128704}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4429610391296}, {'type': 'perplexity', 'in_batch_dim': [8, 336], 'batch_size': 8, 'flops': 6644415553152}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4746011128704}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3796808916480}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3796808916480}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5062411866112}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5378812603520}, {'type': 'perplexity', 'in_batch_dim': [8, 368], 'batch_size': 8, 'flops': 7277217027968}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3480408179072}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4113209653888}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4746011128704}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3480408179072}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4746011128704}, {'type': 'perplexity', 'in_batch_dim': [8, 1168], 'batch_size': 8, 'flops': 23097253898368}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5062411866112}, {'type': 'perplexity', 'in_batch_dim': [8, 160], 'batch_size': 8, 'flops': 3164007441664}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4746011128704}, {'type': 'perplexity', 'in_batch_dim': [8, 640], 'batch_size': 8, 'flops': 12656029563904}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5378812603520}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4429610391296}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4429610391296}, {'type': 'perplexity', 'in_batch_dim': [8, 384], 'batch_size': 8, 'flops': 7593617765376}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3480408179072}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4113209653888}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3796808916480}, {'type': 'perplexity', 'in_batch_dim': [8, 336], 'batch_size': 8, 'flops': 6644415553152}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3480408179072}, {'type': 'perplexity', 'in_batch_dim': [6, 208], 'batch_size': 8, 'flops': 4113209653888}], 'timestamp': '2025-09-10 02:28:19.889322', 'step': 5880, 'epoch': 3} {'type': 'pplx', 'content': 23481078.320965376, 'timestamp': '2025-09-10 02:28:19.892251', 'step': 5880, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 7593922352128}, 'timestamp': '2025-09-10 02:28:19.922083', 'step': 5880, 'epoch': 3} {'type': 'loss', 'content': 0.00028165520052425563, 'timestamp': '2025-09-10 02:28:19.926300', 'step': 5881, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 8068526078144}, 'timestamp': '2025-09-10 02:28:19.957969', 'step': 5881, 'epoch': 3} {'type': 'loss', 'content': 0.0006952984258532524, 'timestamp': '2025-09-10 02:28:19.967638', 'step': 5882, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 416], 'flops': 12339959612288}, 'timestamp': '2025-09-10 02:28:20.007260', 'step': 5882, 'epoch': 3} {'type': 'loss', 'content': 0.0002651652612257749, 'timestamp': '2025-09-10 02:28:20.023204', 'step': 5883, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 176], 'flops': 5220903722048}, 'timestamp': '2025-09-10 02:28:20.053550', 'step': 5883, 'epoch': 3} {'type': 'loss', 'content': 0.00028942085918970406, 'timestamp': '2025-09-10 02:28:20.078433', 'step': 5884, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 7119318626112}, 'timestamp': '2025-09-10 02:28:20.109579', 'step': 5884, 'epoch': 3} {'type': 'loss', 'content': 5.7478438975522295e-05, 'timestamp': '2025-09-10 02:28:20.114673', 'step': 5885, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 304], 'flops': 9017733530176}, 'timestamp': '2025-09-10 02:28:20.145848', 'step': 5885, 'epoch': 3} {'type': 'loss', 'content': 9.486764611210674e-05, 'timestamp': '2025-09-10 02:28:20.158042', 'step': 5886, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 8543129804160}, 'timestamp': '2025-09-10 02:28:20.188926', 'step': 5886, 'epoch': 3} {'type': 'loss', 'content': 0.00015322092804126441, 'timestamp': '2025-09-10 02:28:20.199824', 'step': 5887, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 5695507448064}, 'timestamp': '2025-09-10 02:28:20.231887', 'step': 5887, 'epoch': 3} {'type': 'loss', 'content': 0.0002798614732455462, 'timestamp': '2025-09-10 02:28:20.257063', 'step': 5888, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 432], 'flops': 12814563338304}, 'timestamp': '2025-09-10 02:28:20.293618', 'step': 5888, 'epoch': 3} {'type': 'loss', 'content': 0.011071518063545227, 'timestamp': '2025-09-10 02:28:20.309306', 'step': 5889, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 8543129804160}, 'timestamp': '2025-09-10 02:28:20.341402', 'step': 5889, 'epoch': 3} {'type': 'loss', 'content': 5.499214239534922e-05, 'timestamp': '2025-09-10 02:28:20.352272', 'step': 5890, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 320], 'flops': 9492337256192}, 'timestamp': '2025-09-10 02:28:20.383929', 'step': 5890, 'epoch': 3} {'type': 'loss', 'content': 0.00014012886094860733, 'timestamp': '2025-09-10 02:28:20.396510', 'step': 5891, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 5695507448064}, 'timestamp': '2025-09-10 02:28:20.427689', 'step': 5891, 'epoch': 3} {'type': 'loss', 'content': 0.0002186378842452541, 'timestamp': '2025-09-10 02:28:20.453131', 'step': 5892, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 7119318626112}, 'timestamp': '2025-09-10 02:28:20.485108', 'step': 5892, 'epoch': 3} {'type': 'loss', 'content': 0.00012612577120307833, 'timestamp': '2025-09-10 02:28:20.490130', 'step': 5893, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-10 02:28:20.521235', 'step': 5893, 'epoch': 3} {'type': 'loss', 'content': 0.0001257530675502494, 'timestamp': '2025-09-10 02:28:20.523900', 'step': 5894, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 5695507448064}, 'timestamp': '2025-09-10 02:28:20.557348', 'step': 5894, 'epoch': 3} {'type': 'loss', 'content': 9.036817209562287e-05, 'timestamp': '2025-09-10 02:28:20.561710', 'step': 5895, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 512], 'flops': 15187581968384}, 'timestamp': '2025-09-10 02:28:20.608903', 'step': 5895, 'epoch': 3} {'type': 'loss', 'content': 0.024240778759121895, 'timestamp': '2025-09-10 02:28:20.647471', 'step': 5896, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 8543129804160}, 'timestamp': '2025-09-10 02:28:20.684847', 'step': 5896, 'epoch': 3} {'type': 'loss', 'content': 7.944705430418253e-05, 'timestamp': '2025-09-10 02:28:20.693630', 'step': 5897, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 176], 'flops': 5220903722048}, 'timestamp': '2025-09-10 02:28:20.732919', 'step': 5897, 'epoch': 3} {'type': 'loss', 'content': 0.00011858268408104777, 'timestamp': '2025-09-10 02:28:20.736885', 'step': 5898, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 6170111174080}, 'timestamp': '2025-09-10 02:28:20.770947', 'step': 5898, 'epoch': 3} {'type': 'loss', 'content': 0.002331020077690482, 'timestamp': '2025-09-10 02:28:20.777874', 'step': 5899, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 6170111174080}, 'timestamp': '2025-09-10 02:28:20.813776', 'step': 5899, 'epoch': 3} {'type': 'loss', 'content': 0.00038511460297740996, 'timestamp': '2025-09-10 02:28:20.841646', 'step': 5900, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 8543129804160}, 'timestamp': '2025-09-10 02:28:20.878522', 'step': 5900, 'epoch': 3} {'type': 'loss', 'content': 0.00011608708882704377, 'timestamp': '2025-09-10 02:28:20.886729', 'step': 5901, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 4746299996032}, 'timestamp': '2025-09-10 02:28:20.917807', 'step': 5901, 'epoch': 3} {'type': 'loss', 'content': 0.002312576165422797, 'timestamp': '2025-09-10 02:28:20.920438', 'step': 5902, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 6170111174080}, 'timestamp': '2025-09-10 02:28:20.958793', 'step': 5902, 'epoch': 3} {'type': 'loss', 'content': 0.002418461488559842, 'timestamp': '2025-09-10 02:28:20.965545', 'step': 5903, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 6644714900096}, 'timestamp': '2025-09-10 02:28:20.997621', 'step': 5903, 'epoch': 3} {'type': 'loss', 'content': 0.0005505615263246, 'timestamp': '2025-09-10 02:28:21.025586', 'step': 5904, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 304], 'flops': 9017733530176}, 'timestamp': '2025-09-10 02:28:21.058667', 'step': 5904, 'epoch': 3} {'type': 'loss', 'content': 0.00045085299643687904, 'timestamp': '2025-09-10 02:28:21.068386', 'step': 5905, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 6170111174080}, 'timestamp': '2025-09-10 02:28:21.104652', 'step': 5905, 'epoch': 3} {'type': 'loss', 'content': 6.284094706643373e-05, 'timestamp': '2025-09-10 02:28:21.111493', 'step': 5906, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 336], 'flops': 9966940982208}, 'timestamp': '2025-09-10 02:28:21.145812', 'step': 5906, 'epoch': 3} {'type': 'loss', 'content': 0.0002078805264318362, 'timestamp': '2025-09-10 02:28:21.159192', 'step': 5907, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 7593922352128}, 'timestamp': '2025-09-10 02:28:21.199027', 'step': 5907, 'epoch': 3} {'type': 'loss', 'content': 0.03610233590006828, 'timestamp': '2025-09-10 02:28:21.227763', 'step': 5908, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 624], 'flops': 18509808050496}, 'timestamp': '2025-09-10 02:28:21.276310', 'step': 5908, 'epoch': 3} {'type': 'loss', 'content': 0.0009849478956311941, 'timestamp': '2025-09-10 02:28:21.298098', 'step': 5909, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 8543129804160}, 'timestamp': '2025-09-10 02:28:21.334078', 'step': 5909, 'epoch': 3} {'type': 'loss', 'content': 0.00027651750133372843, 'timestamp': '2025-09-10 02:28:21.344901', 'step': 5910, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 7593922352128}, 'timestamp': '2025-09-10 02:28:21.378005', 'step': 5910, 'epoch': 3} {'type': 'loss', 'content': 0.0013139198999851942, 'timestamp': '2025-09-10 02:28:21.385789', 'step': 5911, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 400], 'flops': 11865355886272}, 'timestamp': '2025-09-10 02:28:21.446447', 'step': 5911, 'epoch': 3} {'type': 'loss', 'content': 0.00023808155674487352, 'timestamp': '2025-09-10 02:28:21.482942', 'step': 5912, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 6644714900096}, 'timestamp': '2025-09-10 02:28:21.530354', 'step': 5912, 'epoch': 3} {'type': 'loss', 'content': 0.000658250879496336, 'timestamp': '2025-09-10 02:28:21.536351', 'step': 5913, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 5695507448064}, 'timestamp': '2025-09-10 02:28:21.568875', 'step': 5913, 'epoch': 3} {'type': 'loss', 'content': 0.00039495486998930573, 'timestamp': '2025-09-10 02:28:21.573402', 'step': 5914, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 336], 'flops': 9966940982208}, 'timestamp': '2025-09-10 02:28:21.620099', 'step': 5914, 'epoch': 3} {'type': 'loss', 'content': 0.012653195299208164, 'timestamp': '2025-09-10 02:28:21.633469', 'step': 5915, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 464], 'flops': 13763770790336}, 'timestamp': '2025-09-10 02:28:21.685057', 'step': 5915, 'epoch': 3} {'type': 'loss', 'content': 0.0027082362212240696, 'timestamp': '2025-09-10 02:28:21.723071', 'step': 5916, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 384], 'flops': 11390752160256}, 'timestamp': '2025-09-10 02:28:21.773382', 'step': 5916, 'epoch': 3} {'type': 'loss', 'content': 0.0002292887365911156, 'timestamp': '2025-09-10 02:28:21.786732', 'step': 5917, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 6644714900096}, 'timestamp': '2025-09-10 02:28:21.829150', 'step': 5917, 'epoch': 3} {'type': 'loss', 'content': 7.886863750172779e-05, 'timestamp': '2025-09-10 02:28:21.836156', 'step': 5918, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 304], 'flops': 9017733530176}, 'timestamp': '2025-09-10 02:28:21.874513', 'step': 5918, 'epoch': 3} {'type': 'loss', 'content': 0.01980876363813877, 'timestamp': '2025-09-10 02:28:21.886855', 'step': 5919, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 8543129804160}, 'timestamp': '2025-09-10 02:28:21.922524', 'step': 5919, 'epoch': 3} {'type': 'loss', 'content': 0.0007908547413535416, 'timestamp': '2025-09-10 02:28:21.954458', 'step': 5920, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 8543129804160}, 'timestamp': '2025-09-10 02:28:21.990423', 'step': 5920, 'epoch': 3} {'type': 'loss', 'content': 0.00044035873725079, 'timestamp': '2025-09-10 02:28:21.998468', 'step': 5921, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 304], 'flops': 9017733530176}, 'timestamp': '2025-09-10 02:28:22.031255', 'step': 5921, 'epoch': 3} {'type': 'loss', 'content': 0.00018272080342285335, 'timestamp': '2025-09-10 02:28:22.043192', 'step': 5922, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 176], 'flops': 5220903722048}, 'timestamp': '2025-09-10 02:28:22.084988', 'step': 5922, 'epoch': 3} {'type': 'loss', 'content': 0.02625429444015026, 'timestamp': '2025-09-10 02:28:22.089699', 'step': 5923, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 7119318626112}, 'timestamp': '2025-09-10 02:28:22.136258', 'step': 5923, 'epoch': 3} {'type': 'loss', 'content': 0.00017044544802047312, 'timestamp': '2025-09-10 02:28:22.164393', 'step': 5924, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 8068526078144}, 'timestamp': '2025-09-10 02:28:22.200917', 'step': 5924, 'epoch': 3} {'type': 'loss', 'content': 0.00031413830583915114, 'timestamp': '2025-09-10 02:28:22.208548', 'step': 5925, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 6170111174080}, 'timestamp': '2025-09-10 02:28:22.245116', 'step': 5925, 'epoch': 3} {'type': 'loss', 'content': 0.0015044523170217872, 'timestamp': '2025-09-10 02:28:22.251901', 'step': 5926, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 7119318626112}, 'timestamp': '2025-09-10 02:28:22.284117', 'step': 5926, 'epoch': 3} {'type': 'loss', 'content': 8.876120409695432e-05, 'timestamp': '2025-09-10 02:28:22.291415', 'step': 5927, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 6170111174080}, 'timestamp': '2025-09-10 02:28:22.322691', 'step': 5927, 'epoch': 3} {'type': 'loss', 'content': 0.0020055093336850405, 'timestamp': '2025-09-10 02:28:22.350437', 'step': 5928, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 8068526078144}, 'timestamp': '2025-09-10 02:28:22.386625', 'step': 5928, 'epoch': 3} {'type': 'loss', 'content': 0.002611653646454215, 'timestamp': '2025-09-10 02:28:22.394203', 'step': 5929, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 8068526078144}, 'timestamp': '2025-09-10 02:28:22.427632', 'step': 5929, 'epoch': 3} {'type': 'loss', 'content': 9.566867083776742e-05, 'timestamp': '2025-09-10 02:28:22.437509', 'step': 5930, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 176], 'flops': 5220903722048}, 'timestamp': '2025-09-10 02:28:22.469507', 'step': 5930, 'epoch': 3} {'type': 'loss', 'content': 0.00027315152692608535, 'timestamp': '2025-09-10 02:28:22.473157', 'step': 5931, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 7119318626112}, 'timestamp': '2025-09-10 02:28:22.506743', 'step': 5931, 'epoch': 3} {'type': 'loss', 'content': 0.00031735419179312885, 'timestamp': '2025-09-10 02:28:22.536072', 'step': 5932, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 7119318626112}, 'timestamp': '2025-09-10 02:28:22.569249', 'step': 5932, 'epoch': 3} {'type': 'loss', 'content': 0.0007971972227096558, 'timestamp': '2025-09-10 02:28:22.574548', 'step': 5933, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 7119318626112}, 'timestamp': '2025-09-10 02:28:22.609284', 'step': 5933, 'epoch': 3} {'type': 'loss', 'content': 0.0020833786111325026, 'timestamp': '2025-09-10 02:28:22.616835', 'step': 5934, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 4746299996032}, 'timestamp': '2025-09-10 02:28:22.650154', 'step': 5934, 'epoch': 3} {'type': 'loss', 'content': 0.014346832409501076, 'timestamp': '2025-09-10 02:28:22.652891', 'step': 5935, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 176], 'flops': 5220903722048}, 'timestamp': '2025-09-10 02:28:22.684922', 'step': 5935, 'epoch': 3} {'type': 'loss', 'content': 0.00011734214058378711, 'timestamp': '2025-09-10 02:28:22.709802', 'step': 5936, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 7119318626112}, 'timestamp': '2025-09-10 02:28:22.742162', 'step': 5936, 'epoch': 3} {'type': 'loss', 'content': 0.0005824709078297019, 'timestamp': '2025-09-10 02:28:22.747280', 'step': 5937, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 304], 'flops': 9017733530176}, 'timestamp': '2025-09-10 02:28:22.784761', 'step': 5937, 'epoch': 3} {'type': 'loss', 'content': 0.0002061406703433022, 'timestamp': '2025-09-10 02:28:22.797027', 'step': 5938, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 176], 'flops': 5220903722048}, 'timestamp': '2025-09-10 02:28:22.834515', 'step': 5938, 'epoch': 3} {'type': 'loss', 'content': 0.000224357980187051, 'timestamp': '2025-09-10 02:28:22.838521', 'step': 5939, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 304], 'flops': 9017733530176}, 'timestamp': '2025-09-10 02:28:22.874457', 'step': 5939, 'epoch': 3} {'type': 'loss', 'content': 0.0007039483753032982, 'timestamp': '2025-09-10 02:28:22.907412', 'step': 5940, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 5695507448064}, 'timestamp': '2025-09-10 02:28:22.953965', 'step': 5940, 'epoch': 3} {'type': 'loss', 'content': 0.00016574481560382992, 'timestamp': '2025-09-10 02:28:22.958355', 'step': 5941, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 5695507448064}, 'timestamp': '2025-09-10 02:28:22.990183', 'step': 5941, 'epoch': 3} {'type': 'loss', 'content': 0.0001796074939193204, 'timestamp': '2025-09-10 02:28:22.994743', 'step': 5942, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 8068526078144}, 'timestamp': '2025-09-10 02:28:23.028030', 'step': 5942, 'epoch': 3} {'type': 'loss', 'content': 0.00036119503783993423, 'timestamp': '2025-09-10 02:28:23.037850', 'step': 5943, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 7119318626112}, 'timestamp': '2025-09-10 02:28:23.072595', 'step': 5943, 'epoch': 3} {'type': 'loss', 'content': 9.575783769832924e-05, 'timestamp': '2025-09-10 02:28:23.100766', 'step': 5944, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 6644714900096}, 'timestamp': '2025-09-10 02:28:23.132544', 'step': 5944, 'epoch': 3} {'type': 'loss', 'content': 0.00012964828056283295, 'timestamp': '2025-09-10 02:28:23.137289', 'step': 5945, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 304], 'flops': 9017733530176}, 'timestamp': '2025-09-10 02:28:23.172581', 'step': 5945, 'epoch': 3} {'type': 'loss', 'content': 0.00028890607063658535, 'timestamp': '2025-09-10 02:28:23.184799', 'step': 5946, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 6644714900096}, 'timestamp': '2025-09-10 02:28:23.219483', 'step': 5946, 'epoch': 3} {'type': 'loss', 'content': 0.0002000013628276065, 'timestamp': '2025-09-10 02:28:23.226663', 'step': 5947, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 176], 'flops': 5220903722048}, 'timestamp': '2025-09-10 02:28:23.264835', 'step': 5947, 'epoch': 3} {'type': 'loss', 'content': 0.0011124643497169018, 'timestamp': '2025-09-10 02:28:23.289841', 'step': 5948, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 6644714900096}, 'timestamp': '2025-09-10 02:28:23.320747', 'step': 5948, 'epoch': 3} {'type': 'loss', 'content': 0.000164168028277345, 'timestamp': '2025-09-10 02:28:23.325561', 'step': 5949, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 6644714900096}, 'timestamp': '2025-09-10 02:28:23.357978', 'step': 5949, 'epoch': 3} {'type': 'loss', 'content': 0.00011684057244565338, 'timestamp': '2025-09-10 02:28:23.365046', 'step': 5950, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 8068526078144}, 'timestamp': '2025-09-10 02:28:23.395277', 'step': 5950, 'epoch': 3} {'type': 'loss', 'content': 9.600551129551604e-05, 'timestamp': '2025-09-10 02:28:23.405621', 'step': 5951, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 304], 'flops': 9017733530176}, 'timestamp': '2025-09-10 02:28:23.436969', 'step': 5951, 'epoch': 3} {'type': 'loss', 'content': 0.0005512385396286845, 'timestamp': '2025-09-10 02:28:23.470138', 'step': 5952, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 8543129804160}, 'timestamp': '2025-09-10 02:28:23.502824', 'step': 5952, 'epoch': 3} {'type': 'loss', 'content': 7.65290460549295e-05, 'timestamp': '2025-09-10 02:28:23.511503', 'step': 5953, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 6170111174080}, 'timestamp': '2025-09-10 02:28:23.559888', 'step': 5953, 'epoch': 3} {'type': 'loss', 'content': 0.00011048233864130452, 'timestamp': '2025-09-10 02:28:23.566592', 'step': 5954, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 7593922352128}, 'timestamp': '2025-09-10 02:28:23.604284', 'step': 5954, 'epoch': 3} {'type': 'loss', 'content': 0.00010653473873389885, 'timestamp': '2025-09-10 02:28:23.612019', 'step': 5955, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 6644714900096}, 'timestamp': '2025-09-10 02:28:23.652712', 'step': 5955, 'epoch': 3} {'type': 'loss', 'content': 4.3004063627449796e-05, 'timestamp': '2025-09-10 02:28:23.680551', 'step': 5956, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 6644714900096}, 'timestamp': '2025-09-10 02:28:23.711182', 'step': 5956, 'epoch': 3} {'type': 'loss', 'content': 0.00041585671715438366, 'timestamp': '2025-09-10 02:28:23.715771', 'step': 5957, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 7593922352128}, 'timestamp': '2025-09-10 02:28:23.747079', 'step': 5957, 'epoch': 3} {'type': 'loss', 'content': 0.0007976609631441534, 'timestamp': '2025-09-10 02:28:23.754701', 'step': 5958, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 6170111174080}, 'timestamp': '2025-09-10 02:28:23.796867', 'step': 5958, 'epoch': 3} {'type': 'loss', 'content': 0.0035458316560834646, 'timestamp': '2025-09-10 02:28:23.803781', 'step': 5959, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 6170111174080}, 'timestamp': '2025-09-10 02:28:23.835245', 'step': 5959, 'epoch': 3} {'type': 'loss', 'content': 0.00028361781733110547, 'timestamp': '2025-09-10 02:28:23.863166', 'step': 5960, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 6644714900096}, 'timestamp': '2025-09-10 02:28:23.894704', 'step': 5960, 'epoch': 3} {'type': 'loss', 'content': 0.00024366001889575273, 'timestamp': '2025-09-10 02:28:23.899409', 'step': 5961, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 7593922352128}, 'timestamp': '2025-09-10 02:28:23.930501', 'step': 5961, 'epoch': 3} {'type': 'loss', 'content': 0.00015509971126448363, 'timestamp': '2025-09-10 02:28:23.938150', 'step': 5962, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 176], 'flops': 5220903722048}, 'timestamp': '2025-09-10 02:28:23.975810', 'step': 5962, 'epoch': 3} {'type': 'loss', 'content': 0.0010104191023856401, 'timestamp': '2025-09-10 02:28:23.979823', 'step': 5963, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 400], 'flops': 11865355886272}, 'timestamp': '2025-09-10 02:28:24.029868', 'step': 5963, 'epoch': 3} {'type': 'loss', 'content': 0.00019829573284368962, 'timestamp': '2025-09-10 02:28:24.066464', 'step': 5964, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 8068526078144}, 'timestamp': '2025-09-10 02:28:24.105446', 'step': 5964, 'epoch': 3} {'type': 'loss', 'content': 9.516144200460985e-05, 'timestamp': '2025-09-10 02:28:24.112665', 'step': 5965, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 6170111174080}, 'timestamp': '2025-09-10 02:28:24.144653', 'step': 5965, 'epoch': 3} {'type': 'loss', 'content': 0.0004203191492706537, 'timestamp': '2025-09-10 02:28:24.151383', 'step': 5966, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 304], 'flops': 9017733530176}, 'timestamp': '2025-09-10 02:28:24.185517', 'step': 5966, 'epoch': 3} {'type': 'loss', 'content': 0.013540414161980152, 'timestamp': '2025-09-10 02:28:24.197692', 'step': 5967, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 8068526078144}, 'timestamp': '2025-09-10 02:28:24.235833', 'step': 5967, 'epoch': 3} {'type': 'loss', 'content': 0.0008812797605060041, 'timestamp': '2025-09-10 02:28:24.267088', 'step': 5968, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 6644714900096}, 'timestamp': '2025-09-10 02:28:24.300779', 'step': 5968, 'epoch': 3} {'type': 'loss', 'content': 0.00030735571635887027, 'timestamp': '2025-09-10 02:28:24.322539', 'step': 5969, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 6170111174080}, 'timestamp': '2025-09-10 02:28:24.361963', 'step': 5969, 'epoch': 3} {'type': 'loss', 'content': 7.28549639461562e-05, 'timestamp': '2025-09-10 02:28:24.368722', 'step': 5970, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 7593922352128}, 'timestamp': '2025-09-10 02:28:24.402502', 'step': 5970, 'epoch': 3} {'type': 'loss', 'content': 0.0002149459905922413, 'timestamp': '2025-09-10 02:28:24.410284', 'step': 5971, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 5695507448064}, 'timestamp': '2025-09-10 02:28:24.445827', 'step': 5971, 'epoch': 3} {'type': 'loss', 'content': 0.0018557047005742788, 'timestamp': '2025-09-10 02:28:24.472269', 'step': 5972, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 5695507448064}, 'timestamp': '2025-09-10 02:28:24.507841', 'step': 5972, 'epoch': 3} {'type': 'loss', 'content': 0.00019703614816535264, 'timestamp': '2025-09-10 02:28:24.510013', 'step': 5973, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 7593922352128}, 'timestamp': '2025-09-10 02:28:24.550589', 'step': 5973, 'epoch': 3} {'type': 'loss', 'content': 0.000475127570098266, 'timestamp': '2025-09-10 02:28:24.557956', 'step': 5974, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 336], 'flops': 9966940982208}, 'timestamp': '2025-09-10 02:28:24.592600', 'step': 5974, 'epoch': 3} {'type': 'loss', 'content': 0.00028725885204039514, 'timestamp': '2025-09-10 02:28:24.605998', 'step': 5975, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 416], 'flops': 12339959612288}, 'timestamp': '2025-09-10 02:28:24.645424', 'step': 5975, 'epoch': 3} {'type': 'loss', 'content': 0.0018704243702813983, 'timestamp': '2025-09-10 02:28:24.682214', 'step': 5976, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 8543129804160}, 'timestamp': '2025-09-10 02:28:24.713623', 'step': 5976, 'epoch': 3} {'type': 'loss', 'content': 0.0017203286988660693, 'timestamp': '2025-09-10 02:28:24.721879', 'step': 5977, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 6644714900096}, 'timestamp': '2025-09-10 02:28:24.755433', 'step': 5977, 'epoch': 3} {'type': 'loss', 'content': 0.00017131041386164725, 'timestamp': '2025-09-10 02:28:24.762523', 'step': 5978, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 6170111174080}, 'timestamp': '2025-09-10 02:28:24.800064', 'step': 5978, 'epoch': 3} {'type': 'loss', 'content': 0.0002911267220042646, 'timestamp': '2025-09-10 02:28:24.806746', 'step': 5979, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 8543129804160}, 'timestamp': '2025-09-10 02:28:24.839679', 'step': 5979, 'epoch': 3} {'type': 'loss', 'content': 0.001717909937724471, 'timestamp': '2025-09-10 02:28:24.871236', 'step': 5980, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 320], 'flops': 9492337256192}, 'timestamp': '2025-09-10 02:28:24.904187', 'step': 5980, 'epoch': 3} {'type': 'loss', 'content': 0.00016485284140799195, 'timestamp': '2025-09-10 02:28:24.914518', 'step': 5981, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 8068526078144}, 'timestamp': '2025-09-10 02:28:24.945807', 'step': 5981, 'epoch': 3} {'type': 'loss', 'content': 0.001138357212767005, 'timestamp': '2025-09-10 02:28:24.956643', 'step': 5982, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 320], 'flops': 9492337256192}, 'timestamp': '2025-09-10 02:28:24.997444', 'step': 5982, 'epoch': 3} {'type': 'loss', 'content': 0.0005265086074359715, 'timestamp': '2025-09-10 02:28:25.009992', 'step': 5983, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 6170111174080}, 'timestamp': '2025-09-10 02:28:25.045539', 'step': 5983, 'epoch': 3} {'type': 'loss', 'content': 0.0021492692176252604, 'timestamp': '2025-09-10 02:28:25.073142', 'step': 5984, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 8543129804160}, 'timestamp': '2025-09-10 02:28:25.111871', 'step': 5984, 'epoch': 3} {'type': 'loss', 'content': 0.0007208751630969346, 'timestamp': '2025-09-10 02:28:25.120136', 'step': 5985, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 512], 'flops': 15187581968384}, 'timestamp': '2025-09-10 02:28:25.163502', 'step': 5985, 'epoch': 3} {'type': 'loss', 'content': 0.0015368768945336342, 'timestamp': '2025-09-10 02:28:25.181153', 'step': 5986, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 400], 'flops': 11865355886272}, 'timestamp': '2025-09-10 02:28:25.219981', 'step': 5986, 'epoch': 3} {'type': 'loss', 'content': 6.340054824249819e-05, 'timestamp': '2025-09-10 02:28:25.235639', 'step': 5987, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 5695507448064}, 'timestamp': '2025-09-10 02:28:25.269918', 'step': 5987, 'epoch': 3} {'type': 'loss', 'content': 0.0003260863886680454, 'timestamp': '2025-09-10 02:28:25.294827', 'step': 5988, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 6644714900096}, 'timestamp': '2025-09-10 02:28:25.332469', 'step': 5988, 'epoch': 3} {'type': 'loss', 'content': 0.00010763067984953523, 'timestamp': '2025-09-10 02:28:25.336705', 'step': 5989, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 176], 'flops': 5220903722048}, 'timestamp': '2025-09-10 02:28:25.368246', 'step': 5989, 'epoch': 3} {'type': 'loss', 'content': 0.00016242521815001965, 'timestamp': '2025-09-10 02:28:25.372341', 'step': 5990, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 5695507448064}, 'timestamp': '2025-09-10 02:28:25.412530', 'step': 5990, 'epoch': 3} {'type': 'loss', 'content': 0.00010946859401883557, 'timestamp': '2025-09-10 02:28:25.416923', 'step': 5991, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 8543129804160}, 'timestamp': '2025-09-10 02:28:25.457416', 'step': 5991, 'epoch': 3} {'type': 'loss', 'content': 0.0001512065064162016, 'timestamp': '2025-09-10 02:28:25.488859', 'step': 5992, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 6644714900096}, 'timestamp': '2025-09-10 02:28:25.526786', 'step': 5992, 'epoch': 3} {'type': 'loss', 'content': 0.00014350096171256155, 'timestamp': '2025-09-10 02:28:25.531078', 'step': 5993, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 5695507448064}, 'timestamp': '2025-09-10 02:28:25.566643', 'step': 5993, 'epoch': 3} {'type': 'loss', 'content': 0.0001927161356434226, 'timestamp': '2025-09-10 02:28:25.570732', 'step': 5994, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-10 02:28:25.602996', 'step': 5994, 'epoch': 3} {'type': 'loss', 'content': 0.0001801040634745732, 'timestamp': '2025-09-10 02:28:25.606374', 'step': 5995, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 8068526078144}, 'timestamp': '2025-09-10 02:28:25.645401', 'step': 5995, 'epoch': 3} {'type': 'loss', 'content': 0.0004571221652440727, 'timestamp': '2025-09-10 02:28:25.675773', 'step': 5996, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 368], 'flops': 10916148434240}, 'timestamp': '2025-09-10 02:28:25.711063', 'step': 5996, 'epoch': 3} {'type': 'loss', 'content': 0.0004833031562156975, 'timestamp': '2025-09-10 02:28:25.724210', 'step': 5997, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 6644714900096}, 'timestamp': '2025-09-10 02:28:25.758522', 'step': 5997, 'epoch': 3} {'type': 'loss', 'content': 8.242072362918407e-05, 'timestamp': '2025-09-10 02:28:25.765098', 'step': 5998, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 6644714900096}, 'timestamp': '2025-09-10 02:28:25.804980', 'step': 5998, 'epoch': 3} {'type': 'loss', 'content': 0.0008570431964471936, 'timestamp': '2025-09-10 02:28:25.811600', 'step': 5999, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 304], 'flops': 9017733530176}, 'timestamp': '2025-09-10 02:28:25.844995', 'step': 5999, 'epoch': 3} {'type': 'loss', 'content': 0.0004085947584826499, 'timestamp': '2025-09-10 02:28:25.877238', 'step': 6000, 'epoch': 3} {'type': 'info', 'content': 'Checkpoint saved at step 6000', 'timestamp': '2025-09-10 02:28:30.932741', 'step': 6000, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 304], 'flops': 9017733530176}, 'timestamp': '2025-09-10 02:28:30.970145', 'step': 6000, 'epoch': 3} {'type': 'loss', 'content': 8.179119322448969e-05, 'timestamp': '2025-09-10 02:28:30.977508', 'step': 6001, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 320], 'flops': 9492337256192}, 'timestamp': '2025-09-10 02:28:31.010049', 'step': 6001, 'epoch': 3} {'type': 'loss', 'content': 0.05771319940686226, 'timestamp': '2025-09-10 02:28:31.022181', 'step': 6002, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 336], 'flops': 9966940982208}, 'timestamp': '2025-09-10 02:28:31.064762', 'step': 6002, 'epoch': 3} {'type': 'loss', 'content': 0.00022734318918082863, 'timestamp': '2025-09-10 02:28:31.077998', 'step': 6003, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 6644714900096}, 'timestamp': '2025-09-10 02:28:31.112251', 'step': 6003, 'epoch': 3} {'type': 'loss', 'content': 0.00017205321637447923, 'timestamp': '2025-09-10 02:28:31.139626', 'step': 6004, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 6644714900096}, 'timestamp': '2025-09-10 02:28:31.173783', 'step': 6004, 'epoch': 3} {'type': 'loss', 'content': 9.907536150421947e-05, 'timestamp': '2025-09-10 02:28:31.181115', 'step': 6005, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 176], 'flops': 5220903722048}, 'timestamp': '2025-09-10 02:28:31.225328', 'step': 6005, 'epoch': 3} {'type': 'loss', 'content': 0.00015806824376340955, 'timestamp': '2025-09-10 02:28:31.228995', 'step': 6006, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 8068526078144}, 'timestamp': '2025-09-10 02:28:31.263698', 'step': 6006, 'epoch': 3} {'type': 'loss', 'content': 0.005198474042117596, 'timestamp': '2025-09-10 02:28:31.273239', 'step': 6007, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 8543129804160}, 'timestamp': '2025-09-10 02:28:31.306749', 'step': 6007, 'epoch': 3} {'type': 'loss', 'content': 0.0007690453785471618, 'timestamp': '2025-09-10 02:28:31.337819', 'step': 6008, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 4746299996032}, 'timestamp': '2025-09-10 02:28:31.378571', 'step': 6008, 'epoch': 3} {'type': 'loss', 'content': 0.00019263003196101636, 'timestamp': '2025-09-10 02:28:31.382186', 'step': 6009, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 7593922352128}, 'timestamp': '2025-09-10 02:28:31.431013', 'step': 6009, 'epoch': 3} {'type': 'loss', 'content': 0.044559430330991745, 'timestamp': '2025-09-10 02:28:31.438377', 'step': 6010, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 368], 'flops': 10916148434240}, 'timestamp': '2025-09-10 02:28:31.488260', 'step': 6010, 'epoch': 3} {'type': 'loss', 'content': 0.00024100964947137982, 'timestamp': '2025-09-10 02:28:31.502012', 'step': 6011, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 8543129804160}, 'timestamp': '2025-09-10 02:28:31.535576', 'step': 6011, 'epoch': 3} {'type': 'loss', 'content': 0.00010909455158980563, 'timestamp': '2025-09-10 02:28:31.566772', 'step': 6012, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 5695507448064}, 'timestamp': '2025-09-10 02:28:31.603718', 'step': 6012, 'epoch': 3} {'type': 'loss', 'content': 0.0001878739712992683, 'timestamp': '2025-09-10 02:28:31.606235', 'step': 6013, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 176], 'flops': 5220903722048}, 'timestamp': '2025-09-10 02:28:31.640132', 'step': 6013, 'epoch': 3} {'type': 'loss', 'content': 0.00019220814283471555, 'timestamp': '2025-09-10 02:28:31.643888', 'step': 6014, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 304], 'flops': 9017733530176}, 'timestamp': '2025-09-10 02:28:31.677008', 'step': 6014, 'epoch': 3} {'type': 'loss', 'content': 0.00022207196161616594, 'timestamp': '2025-09-10 02:28:31.688506', 'step': 6015, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 464], 'flops': 13763770790336}, 'timestamp': '2025-09-10 02:28:31.747631', 'step': 6015, 'epoch': 3} {'type': 'loss', 'content': 0.0015352964401245117, 'timestamp': '2025-09-10 02:28:31.785608', 'step': 6016, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 6644714900096}, 'timestamp': '2025-09-10 02:28:31.829619', 'step': 6016, 'epoch': 3} {'type': 'loss', 'content': 0.0025681753177195787, 'timestamp': '2025-09-10 02:28:31.834054', 'step': 6017, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 6644714900096}, 'timestamp': '2025-09-10 02:28:31.866562', 'step': 6017, 'epoch': 3} {'type': 'loss', 'content': 8.73608369147405e-05, 'timestamp': '2025-09-10 02:28:31.873272', 'step': 6018, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 416], 'flops': 12339959612288}, 'timestamp': '2025-09-10 02:28:31.913411', 'step': 6018, 'epoch': 3} {'type': 'loss', 'content': 0.0003728985320776701, 'timestamp': '2025-09-10 02:28:31.929287', 'step': 6019, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-10 02:28:31.967247', 'step': 6019, 'epoch': 3} {'type': 'loss', 'content': 0.00013485472300089896, 'timestamp': '2025-09-10 02:28:31.991264', 'step': 6020, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 8068526078144}, 'timestamp': '2025-09-10 02:28:32.024606', 'step': 6020, 'epoch': 3} {'type': 'loss', 'content': 0.0008274485589936376, 'timestamp': '2025-09-10 02:28:32.031550', 'step': 6021, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 336], 'flops': 9966940982208}, 'timestamp': '2025-09-10 02:28:32.066679', 'step': 6021, 'epoch': 3} {'type': 'loss', 'content': 0.00025299013941548765, 'timestamp': '2025-09-10 02:28:32.080063', 'step': 6022, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 7593922352128}, 'timestamp': '2025-09-10 02:28:32.113274', 'step': 6022, 'epoch': 3} {'type': 'loss', 'content': 0.0001833633432397619, 'timestamp': '2025-09-10 02:28:32.120704', 'step': 6023, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 5695507448064}, 'timestamp': '2025-09-10 02:28:32.160564', 'step': 6023, 'epoch': 3} {'type': 'loss', 'content': 7.197562081273645e-05, 'timestamp': '2025-09-10 02:28:32.186162', 'step': 6024, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 7593922352128}, 'timestamp': '2025-09-10 02:28:32.219417', 'step': 6024, 'epoch': 3} {'type': 'loss', 'content': 0.00024802552070468664, 'timestamp': '2025-09-10 02:28:32.224460', 'step': 6025, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 8068526078144}, 'timestamp': '2025-09-10 02:28:32.281475', 'step': 6025, 'epoch': 3} {'type': 'loss', 'content': 0.000184178032213822, 'timestamp': '2025-09-10 02:28:32.291170', 'step': 6026, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 8543129804160}, 'timestamp': '2025-09-10 02:28:32.323527', 'step': 6026, 'epoch': 3} {'type': 'loss', 'content': 0.015562635846436024, 'timestamp': '2025-09-10 02:28:32.334078', 'step': 6027, 'epoch': 3} {'type': 'flops', 'content': [{'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5695213340928}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3796808916480}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4429610391296}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4746011128704}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4746011128704}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6011614078336}, {'type': 'perplexity', 'in_batch_dim': [8, 736], 'batch_size': 8, 'flops': 14554433988352}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3796808916480}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4113209653888}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3796808916480}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4746011128704}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2847606704256}, {'type': 'perplexity', 'in_batch_dim': [8, 320], 'batch_size': 8, 'flops': 6328014815744}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5378812603520}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3796808916480}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4746011128704}, {'type': 'perplexity', 'in_batch_dim': [8, 352], 'batch_size': 8, 'flops': 6960816290560}, {'type': 'perplexity', 'in_batch_dim': [8, 384], 'batch_size': 8, 'flops': 7593617765376}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5378812603520}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4429610391296}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4113209653888}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5062411866112}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5062411866112}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4429610391296}, {'type': 'perplexity', 'in_batch_dim': [8, 480], 'batch_size': 8, 'flops': 9492022189824}, {'type': 'perplexity', 'in_batch_dim': [8, 448], 'batch_size': 8, 'flops': 8859220715008}, {'type': 'perplexity', 'in_batch_dim': [8, 320], 'batch_size': 8, 'flops': 6328014815744}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3796808916480}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6011614078336}, {'type': 'perplexity', 'in_batch_dim': [8, 320], 'batch_size': 8, 'flops': 6328014815744}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5695213340928}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4746011128704}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4429610391296}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5378812603520}, {'type': 'perplexity', 'in_batch_dim': [8, 544], 'batch_size': 8, 'flops': 10757625139456}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4429610391296}, {'type': 'perplexity', 'in_batch_dim': [8, 320], 'batch_size': 8, 'flops': 6328014815744}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4113209653888}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4113209653888}, {'type': 'perplexity', 'in_batch_dim': [8, 320], 'batch_size': 8, 'flops': 6328014815744}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3480408179072}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5062411866112}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6011614078336}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4429610391296}, {'type': 'perplexity', 'in_batch_dim': [8, 384], 'batch_size': 8, 'flops': 7593617765376}, {'type': 'perplexity', 'in_batch_dim': [8, 416], 'batch_size': 8, 'flops': 8226419240192}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4746011128704}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3480408179072}, {'type': 'perplexity', 'in_batch_dim': [8, 384], 'batch_size': 8, 'flops': 7593617765376}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3480408179072}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3480408179072}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5378812603520}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3796808916480}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4113209653888}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5062411866112}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5378812603520}, {'type': 'perplexity', 'in_batch_dim': [8, 320], 'batch_size': 8, 'flops': 6328014815744}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4429610391296}, {'type': 'perplexity', 'in_batch_dim': [8, 160], 'batch_size': 8, 'flops': 3164007441664}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4113209653888}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5695213340928}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3796808916480}, {'type': 'perplexity', 'in_batch_dim': [8, 160], 'batch_size': 8, 'flops': 3164007441664}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6011614078336}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5062411866112}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4429610391296}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3480408179072}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4746011128704}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3480408179072}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4113209653888}, {'type': 'perplexity', 'in_batch_dim': [8, 624], 'batch_size': 8, 'flops': 12339628826496}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4113209653888}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5695213340928}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5062411866112}, {'type': 'perplexity', 'in_batch_dim': [8, 384], 'batch_size': 8, 'flops': 7593617765376}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4429610391296}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4113209653888}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5062411866112}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4429610391296}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5695213340928}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3796808916480}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5378812603520}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4429610391296}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5378812603520}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3796808916480}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5695213340928}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4429610391296}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5062411866112}, {'type': 'perplexity', 'in_batch_dim': [8, 160], 'batch_size': 8, 'flops': 3164007441664}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4746011128704}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5695213340928}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3796808916480}, {'type': 'perplexity', 'in_batch_dim': [8, 416], 'batch_size': 8, 'flops': 8226419240192}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3796808916480}, {'type': 'perplexity', 'in_batch_dim': [8, 160], 'batch_size': 8, 'flops': 3164007441664}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4746011128704}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3796808916480}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2847606704256}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5378812603520}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4429610391296}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4113209653888}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4429610391296}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6011614078336}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4113209653888}, {'type': 'perplexity', 'in_batch_dim': [8, 320], 'batch_size': 8, 'flops': 6328014815744}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4746011128704}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4746011128704}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3796808916480}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5062411866112}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3480408179072}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3796808916480}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3480408179072}, {'type': 'perplexity', 'in_batch_dim': [8, 352], 'batch_size': 8, 'flops': 6960816290560}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5062411866112}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4429610391296}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6011614078336}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5062411866112}, {'type': 'perplexity', 'in_batch_dim': [8, 576], 'batch_size': 8, 'flops': 11390426614272}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4429610391296}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3796808916480}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6011614078336}, {'type': 'perplexity', 'in_batch_dim': [8, 432], 'batch_size': 8, 'flops': 8542819977600}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5378812603520}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5378812603520}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5062411866112}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4113209653888}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4113209653888}, {'type': 'perplexity', 'in_batch_dim': [8, 368], 'batch_size': 8, 'flops': 7277217027968}, {'type': 'perplexity', 'in_batch_dim': [8, 160], 'batch_size': 8, 'flops': 3164007441664}, {'type': 'perplexity', 'in_batch_dim': [8, 368], 'batch_size': 8, 'flops': 7277217027968}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3796808916480}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4746011128704}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4113209653888}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4429610391296}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5695213340928}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5695213340928}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5062411866112}, {'type': 'perplexity', 'in_batch_dim': [8, 432], 'batch_size': 8, 'flops': 8542819977600}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5062411866112}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4113209653888}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3480408179072}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3480408179072}, {'type': 'perplexity', 'in_batch_dim': [8, 160], 'batch_size': 8, 'flops': 3164007441664}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4113209653888}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3796808916480}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5062411866112}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 368], 'batch_size': 8, 'flops': 7277217027968}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5378812603520}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5378812603520}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5062411866112}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2847606704256}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4429610391296}, {'type': 'perplexity', 'in_batch_dim': [8, 320], 'batch_size': 8, 'flops': 6328014815744}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3796808916480}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4429610391296}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4113209653888}, {'type': 'perplexity', 'in_batch_dim': [8, 384], 'batch_size': 8, 'flops': 7593617765376}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5378812603520}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4746011128704}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3480408179072}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5062411866112}, {'type': 'perplexity', 'in_batch_dim': [8, 320], 'batch_size': 8, 'flops': 6328014815744}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5378812603520}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4113209653888}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4113209653888}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5695213340928}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3480408179072}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6011614078336}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4746011128704}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5378812603520}, {'type': 'perplexity', 'in_batch_dim': [8, 320], 'batch_size': 8, 'flops': 6328014815744}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4429610391296}, {'type': 'perplexity', 'in_batch_dim': [8, 320], 'batch_size': 8, 'flops': 6328014815744}, {'type': 'perplexity', 'in_batch_dim': [8, 416], 'batch_size': 8, 'flops': 8226419240192}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5062411866112}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5062411866112}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2847606704256}, {'type': 'perplexity', 'in_batch_dim': [8, 336], 'batch_size': 8, 'flops': 6644415553152}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6011614078336}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5062411866112}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4113209653888}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5695213340928}, {'type': 'perplexity', 'in_batch_dim': [8, 416], 'batch_size': 8, 'flops': 8226419240192}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3796808916480}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5695213340928}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4746011128704}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4429610391296}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4113209653888}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3480408179072}, {'type': 'perplexity', 'in_batch_dim': [8, 320], 'batch_size': 8, 'flops': 6328014815744}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3480408179072}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5378812603520}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6011614078336}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4429610391296}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5695213340928}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5695213340928}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3480408179072}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4429610391296}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5062411866112}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3480408179072}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4113209653888}, {'type': 'perplexity', 'in_batch_dim': [8, 352], 'batch_size': 8, 'flops': 6960816290560}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4113209653888}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2847606704256}, {'type': 'perplexity', 'in_batch_dim': [8, 160], 'batch_size': 8, 'flops': 3164007441664}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3796808916480}, {'type': 'perplexity', 'in_batch_dim': [8, 496], 'batch_size': 8, 'flops': 9808422927232}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3480408179072}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4113209653888}, {'type': 'perplexity', 'in_batch_dim': [8, 544], 'batch_size': 8, 'flops': 10757625139456}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6011614078336}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5378812603520}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5695213340928}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4746011128704}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5695213340928}, {'type': 'perplexity', 'in_batch_dim': [8, 384], 'batch_size': 8, 'flops': 7593617765376}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3480408179072}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5378812603520}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3480408179072}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5378812603520}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5062411866112}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3796808916480}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5062411866112}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5695213340928}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5378812603520}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4429610391296}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5062411866112}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3480408179072}, {'type': 'perplexity', 'in_batch_dim': [8, 656], 'batch_size': 8, 'flops': 12972430301312}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5062411866112}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3796808916480}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3480408179072}, {'type': 'perplexity', 'in_batch_dim': [8, 160], 'batch_size': 8, 'flops': 3164007441664}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5695213340928}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6011614078336}, {'type': 'perplexity', 'in_batch_dim': [8, 320], 'batch_size': 8, 'flops': 6328014815744}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5062411866112}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5062411866112}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5695213340928}, {'type': 'perplexity', 'in_batch_dim': [8, 320], 'batch_size': 8, 'flops': 6328014815744}, {'type': 'perplexity', 'in_batch_dim': [8, 384], 'batch_size': 8, 'flops': 7593617765376}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5062411866112}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4746011128704}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4113209653888}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4746011128704}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4746011128704}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3480408179072}, {'type': 'perplexity', 'in_batch_dim': [8, 320], 'batch_size': 8, 'flops': 6328014815744}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5695213340928}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5062411866112}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4113209653888}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4746011128704}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2847606704256}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3796808916480}, {'type': 'perplexity', 'in_batch_dim': [8, 336], 'batch_size': 8, 'flops': 6644415553152}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4113209653888}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4746011128704}, {'type': 'perplexity', 'in_batch_dim': [8, 496], 'batch_size': 8, 'flops': 9808422927232}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5378812603520}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5378812603520}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4429610391296}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5062411866112}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6011614078336}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4746011128704}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3796808916480}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4113209653888}, {'type': 'perplexity', 'in_batch_dim': [8, 496], 'batch_size': 8, 'flops': 9808422927232}, {'type': 'perplexity', 'in_batch_dim': [8, 448], 'batch_size': 8, 'flops': 8859220715008}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6011614078336}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3796808916480}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3480408179072}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5062411866112}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4429610391296}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4113209653888}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4746011128704}, {'type': 'perplexity', 'in_batch_dim': [8, 320], 'batch_size': 8, 'flops': 6328014815744}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6011614078336}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3480408179072}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3796808916480}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3796808916480}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4746011128704}, {'type': 'perplexity', 'in_batch_dim': [8, 416], 'batch_size': 8, 'flops': 8226419240192}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5695213340928}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6011614078336}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6011614078336}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4746011128704}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3480408179072}, {'type': 'perplexity', 'in_batch_dim': [8, 160], 'batch_size': 8, 'flops': 3164007441664}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5695213340928}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3480408179072}, {'type': 'perplexity', 'in_batch_dim': [8, 400], 'batch_size': 8, 'flops': 7910018502784}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4429610391296}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5378812603520}, {'type': 'perplexity', 'in_batch_dim': [8, 400], 'batch_size': 8, 'flops': 7910018502784}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5378812603520}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3480408179072}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6011614078336}, {'type': 'perplexity', 'in_batch_dim': [8, 160], 'batch_size': 8, 'flops': 3164007441664}, {'type': 'perplexity', 'in_batch_dim': [8, 352], 'batch_size': 8, 'flops': 6960816290560}, {'type': 'perplexity', 'in_batch_dim': [8, 320], 'batch_size': 8, 'flops': 6328014815744}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5062411866112}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5062411866112}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4113209653888}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4429610391296}, {'type': 'perplexity', 'in_batch_dim': [8, 352], 'batch_size': 8, 'flops': 6960816290560}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4113209653888}, {'type': 'perplexity', 'in_batch_dim': [8, 160], 'batch_size': 8, 'flops': 3164007441664}, {'type': 'perplexity', 'in_batch_dim': [8, 336], 'batch_size': 8, 'flops': 6644415553152}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4429610391296}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3796808916480}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5378812603520}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5378812603520}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4113209653888}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5378812603520}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3796808916480}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5378812603520}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5378812603520}, {'type': 'perplexity', 'in_batch_dim': [8, 160], 'batch_size': 8, 'flops': 3164007441664}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3480408179072}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6011614078336}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5695213340928}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5378812603520}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5378812603520}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4429610391296}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4746011128704}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6011614078336}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5062411866112}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6011614078336}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5378812603520}, {'type': 'perplexity', 'in_batch_dim': [8, 160], 'batch_size': 8, 'flops': 3164007441664}, {'type': 'perplexity', 'in_batch_dim': [8, 160], 'batch_size': 8, 'flops': 3164007441664}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5062411866112}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5062411866112}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3796808916480}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6011614078336}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4113209653888}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4746011128704}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5378812603520}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3480408179072}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3796808916480}, {'type': 'perplexity', 'in_batch_dim': [8, 320], 'batch_size': 8, 'flops': 6328014815744}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5062411866112}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4746011128704}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5062411866112}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4746011128704}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6011614078336}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3796808916480}, {'type': 'perplexity', 'in_batch_dim': [8, 160], 'batch_size': 8, 'flops': 3164007441664}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2847606704256}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4429610391296}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4746011128704}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5062411866112}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5378812603520}, {'type': 'perplexity', 'in_batch_dim': [8, 464], 'batch_size': 8, 'flops': 9175621452416}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4113209653888}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3480408179072}, {'type': 'perplexity', 'in_batch_dim': [8, 384], 'batch_size': 8, 'flops': 7593617765376}, {'type': 'perplexity', 'in_batch_dim': [8, 448], 'batch_size': 8, 'flops': 8859220715008}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5378812603520}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4113209653888}, {'type': 'perplexity', 'in_batch_dim': [8, 320], 'batch_size': 8, 'flops': 6328014815744}, {'type': 'perplexity', 'in_batch_dim': [8, 560], 'batch_size': 8, 'flops': 11074025876864}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5378812603520}, {'type': 'perplexity', 'in_batch_dim': [8, 160], 'batch_size': 8, 'flops': 3164007441664}, {'type': 'perplexity', 'in_batch_dim': [8, 320], 'batch_size': 8, 'flops': 6328014815744}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5062411866112}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6011614078336}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4746011128704}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6011614078336}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3796808916480}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4429610391296}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4113209653888}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4746011128704}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3480408179072}, {'type': 'perplexity', 'in_batch_dim': [8, 384], 'batch_size': 8, 'flops': 7593617765376}, {'type': 'perplexity', 'in_batch_dim': [8, 576], 'batch_size': 8, 'flops': 11390426614272}, {'type': 'perplexity', 'in_batch_dim': [8, 160], 'batch_size': 8, 'flops': 3164007441664}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4746011128704}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4746011128704}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4429610391296}, {'type': 'perplexity', 'in_batch_dim': [8, 336], 'batch_size': 8, 'flops': 6644415553152}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4746011128704}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3796808916480}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3796808916480}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5062411866112}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5378812603520}, {'type': 'perplexity', 'in_batch_dim': [8, 368], 'batch_size': 8, 'flops': 7277217027968}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3480408179072}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4113209653888}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4746011128704}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3480408179072}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4746011128704}, {'type': 'perplexity', 'in_batch_dim': [8, 1168], 'batch_size': 8, 'flops': 23097253898368}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5062411866112}, {'type': 'perplexity', 'in_batch_dim': [8, 160], 'batch_size': 8, 'flops': 3164007441664}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4746011128704}, {'type': 'perplexity', 'in_batch_dim': [8, 640], 'batch_size': 8, 'flops': 12656029563904}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5378812603520}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4429610391296}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4429610391296}, {'type': 'perplexity', 'in_batch_dim': [8, 384], 'batch_size': 8, 'flops': 7593617765376}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3480408179072}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4113209653888}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3796808916480}, {'type': 'perplexity', 'in_batch_dim': [8, 336], 'batch_size': 8, 'flops': 6644415553152}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3480408179072}, {'type': 'perplexity', 'in_batch_dim': [6, 208], 'batch_size': 8, 'flops': 4113209653888}], 'timestamp': '2025-09-10 02:28:42.928241', 'step': 6027, 'epoch': 3} {'type': 'pplx', 'content': 24656336.660595033, 'timestamp': '2025-09-10 02:28:42.931831', 'step': 6027, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 304], 'flops': 9017733530176}, 'timestamp': '2025-09-10 02:28:42.964536', 'step': 6027, 'epoch': 3} {'type': 'loss', 'content': 0.0004599474195856601, 'timestamp': '2025-09-10 02:28:42.996271', 'step': 6028, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 6170111174080}, 'timestamp': '2025-09-10 02:28:43.041688', 'step': 6028, 'epoch': 3} {'type': 'loss', 'content': 0.0006434383685700595, 'timestamp': '2025-09-10 02:28:43.052048', 'step': 6029, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 8543129804160}, 'timestamp': '2025-09-10 02:28:43.099411', 'step': 6029, 'epoch': 3} {'type': 'loss', 'content': 0.0006290274322964251, 'timestamp': '2025-09-10 02:28:43.109959', 'step': 6030, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 368], 'flops': 10916148434240}, 'timestamp': '2025-09-10 02:28:43.149728', 'step': 6030, 'epoch': 3} {'type': 'loss', 'content': 0.0011963268043473363, 'timestamp': '2025-09-10 02:28:43.163589', 'step': 6031, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 432], 'flops': 12814563338304}, 'timestamp': '2025-09-10 02:28:43.211356', 'step': 6031, 'epoch': 3} {'type': 'loss', 'content': 0.00023444702674169093, 'timestamp': '2025-09-10 02:28:43.248438', 'step': 6032, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 7119318626112}, 'timestamp': '2025-09-10 02:28:43.285507', 'step': 6032, 'epoch': 3} {'type': 'loss', 'content': 0.0004736782575491816, 'timestamp': '2025-09-10 02:28:43.290490', 'step': 6033, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 320], 'flops': 9492337256192}, 'timestamp': '2025-09-10 02:28:43.330232', 'step': 6033, 'epoch': 3} {'type': 'loss', 'content': 0.0014104725560173392, 'timestamp': '2025-09-10 02:28:43.342832', 'step': 6034, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 6644714900096}, 'timestamp': '2025-09-10 02:28:43.376100', 'step': 6034, 'epoch': 3} {'type': 'loss', 'content': 0.014761857688426971, 'timestamp': '2025-09-10 02:28:43.386327', 'step': 6035, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 7593922352128}, 'timestamp': '2025-09-10 02:28:43.427628', 'step': 6035, 'epoch': 3} {'type': 'loss', 'content': 0.0003512998518999666, 'timestamp': '2025-09-10 02:28:43.457347', 'step': 6036, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 320], 'flops': 9492337256192}, 'timestamp': '2025-09-10 02:28:43.505904', 'step': 6036, 'epoch': 3} {'type': 'loss', 'content': 0.0010449119145050645, 'timestamp': '2025-09-10 02:28:43.516285', 'step': 6037, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 336], 'flops': 9966940982208}, 'timestamp': '2025-09-10 02:28:43.558636', 'step': 6037, 'epoch': 3} {'type': 'loss', 'content': 0.00028321417630650103, 'timestamp': '2025-09-10 02:28:43.572055', 'step': 6038, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 304], 'flops': 9017733530176}, 'timestamp': '2025-09-10 02:28:43.605389', 'step': 6038, 'epoch': 3} {'type': 'loss', 'content': 0.004012010060250759, 'timestamp': '2025-09-10 02:28:43.617264', 'step': 6039, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 8543129804160}, 'timestamp': '2025-09-10 02:28:43.652786', 'step': 6039, 'epoch': 3} {'type': 'loss', 'content': 0.0009143882198259234, 'timestamp': '2025-09-10 02:28:43.684521', 'step': 6040, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 336], 'flops': 9966940982208}, 'timestamp': '2025-09-10 02:28:43.717671', 'step': 6040, 'epoch': 3} {'type': 'loss', 'content': 0.0016910507110878825, 'timestamp': '2025-09-10 02:28:43.730404', 'step': 6041, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 7593922352128}, 'timestamp': '2025-09-10 02:28:43.765385', 'step': 6041, 'epoch': 3} {'type': 'loss', 'content': 0.007233879994601011, 'timestamp': '2025-09-10 02:28:43.772972', 'step': 6042, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 6644714900096}, 'timestamp': '2025-09-10 02:28:43.805267', 'step': 6042, 'epoch': 3} {'type': 'loss', 'content': 0.0003337309753987938, 'timestamp': '2025-09-10 02:28:43.812135', 'step': 6043, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 320], 'flops': 9492337256192}, 'timestamp': '2025-09-10 02:28:43.857425', 'step': 6043, 'epoch': 3} {'type': 'loss', 'content': 0.001774253905750811, 'timestamp': '2025-09-10 02:28:43.890870', 'step': 6044, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 336], 'flops': 9966940982208}, 'timestamp': '2025-09-10 02:28:43.926016', 'step': 6044, 'epoch': 3} {'type': 'loss', 'content': 0.009199073538184166, 'timestamp': '2025-09-10 02:28:43.938684', 'step': 6045, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 5695507448064}, 'timestamp': '2025-09-10 02:28:43.973721', 'step': 6045, 'epoch': 3} {'type': 'loss', 'content': 0.00048229689127765596, 'timestamp': '2025-09-10 02:28:43.978131', 'step': 6046, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 304], 'flops': 9017733530176}, 'timestamp': '2025-09-10 02:28:44.010245', 'step': 6046, 'epoch': 3} {'type': 'loss', 'content': 0.0005172424134798348, 'timestamp': '2025-09-10 02:28:44.022269', 'step': 6047, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 8543129804160}, 'timestamp': '2025-09-10 02:28:44.056102', 'step': 6047, 'epoch': 3} {'type': 'loss', 'content': 0.009047497995197773, 'timestamp': '2025-09-10 02:28:44.088040', 'step': 6048, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 352], 'flops': 10441544708224}, 'timestamp': '2025-09-10 02:28:44.140827', 'step': 6048, 'epoch': 3} {'type': 'loss', 'content': 0.0007229651673696935, 'timestamp': '2025-09-10 02:28:44.153855', 'step': 6049, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 336], 'flops': 9966940982208}, 'timestamp': '2025-09-10 02:28:44.205343', 'step': 6049, 'epoch': 3} {'type': 'loss', 'content': 0.0003836154646705836, 'timestamp': '2025-09-10 02:28:44.218696', 'step': 6050, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 5695507448064}, 'timestamp': '2025-09-10 02:28:44.257047', 'step': 6050, 'epoch': 3} {'type': 'loss', 'content': 0.00197161384858191, 'timestamp': '2025-09-10 02:28:44.264179', 'step': 6051, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 6170111174080}, 'timestamp': '2025-09-10 02:28:44.300441', 'step': 6051, 'epoch': 3} {'type': 'loss', 'content': 0.01630636677145958, 'timestamp': '2025-09-10 02:28:44.328369', 'step': 6052, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 6170111174080}, 'timestamp': '2025-09-10 02:28:44.364877', 'step': 6052, 'epoch': 3} {'type': 'loss', 'content': 6.930591916898265e-05, 'timestamp': '2025-09-10 02:28:44.369520', 'step': 6053, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 304], 'flops': 9017733530176}, 'timestamp': '2025-09-10 02:28:44.409417', 'step': 6053, 'epoch': 3} {'type': 'loss', 'content': 0.00038816872984170914, 'timestamp': '2025-09-10 02:28:44.421684', 'step': 6054, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 8068526078144}, 'timestamp': '2025-09-10 02:28:44.456166', 'step': 6054, 'epoch': 3} {'type': 'loss', 'content': 0.0002050340553978458, 'timestamp': '2025-09-10 02:28:44.466586', 'step': 6055, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 8068526078144}, 'timestamp': '2025-09-10 02:28:44.515033', 'step': 6055, 'epoch': 3} {'type': 'loss', 'content': 0.00029212163644842803, 'timestamp': '2025-09-10 02:28:44.546306', 'step': 6056, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 7119318626112}, 'timestamp': '2025-09-10 02:28:44.591369', 'step': 6056, 'epoch': 3} {'type': 'loss', 'content': 0.00010645970905898139, 'timestamp': '2025-09-10 02:28:44.597363', 'step': 6057, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 304], 'flops': 9017733530176}, 'timestamp': '2025-09-10 02:28:44.633439', 'step': 6057, 'epoch': 3} {'type': 'loss', 'content': 0.0017570939380675554, 'timestamp': '2025-09-10 02:28:44.645838', 'step': 6058, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 5695507448064}, 'timestamp': '2025-09-10 02:28:44.682235', 'step': 6058, 'epoch': 3} {'type': 'loss', 'content': 0.00048702204367145896, 'timestamp': '2025-09-10 02:28:44.686480', 'step': 6059, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 8543129804160}, 'timestamp': '2025-09-10 02:28:44.718445', 'step': 6059, 'epoch': 3} {'type': 'loss', 'content': 0.00012442604929674417, 'timestamp': '2025-09-10 02:28:44.750222', 'step': 6060, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 6170111174080}, 'timestamp': '2025-09-10 02:28:44.785080', 'step': 6060, 'epoch': 3} {'type': 'loss', 'content': 0.0001539927179692313, 'timestamp': '2025-09-10 02:28:44.789559', 'step': 6061, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 7119318626112}, 'timestamp': '2025-09-10 02:28:44.832176', 'step': 6061, 'epoch': 3} {'type': 'loss', 'content': 0.001589708379469812, 'timestamp': '2025-09-10 02:28:44.845571', 'step': 6062, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 8068526078144}, 'timestamp': '2025-09-10 02:28:44.898514', 'step': 6062, 'epoch': 3} {'type': 'loss', 'content': 8.440674719167873e-05, 'timestamp': '2025-09-10 02:28:44.908974', 'step': 6063, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 6170111174080}, 'timestamp': '2025-09-10 02:28:44.945327', 'step': 6063, 'epoch': 3} {'type': 'loss', 'content': 0.022429468110203743, 'timestamp': '2025-09-10 02:28:44.973142', 'step': 6064, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 320], 'flops': 9492337256192}, 'timestamp': '2025-09-10 02:28:45.007027', 'step': 6064, 'epoch': 3} {'type': 'loss', 'content': 0.0002527502947486937, 'timestamp': '2025-09-10 02:28:45.017259', 'step': 6065, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 368], 'flops': 10916148434240}, 'timestamp': '2025-09-10 02:28:45.056236', 'step': 6065, 'epoch': 3} {'type': 'loss', 'content': 0.001043917378410697, 'timestamp': '2025-09-10 02:28:45.070101', 'step': 6066, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 320], 'flops': 9492337256192}, 'timestamp': '2025-09-10 02:28:45.102885', 'step': 6066, 'epoch': 3} {'type': 'loss', 'content': 0.00024156781728379428, 'timestamp': '2025-09-10 02:28:45.115456', 'step': 6067, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 8068526078144}, 'timestamp': '2025-09-10 02:28:45.148757', 'step': 6067, 'epoch': 3} {'type': 'loss', 'content': 8.486958540743217e-05, 'timestamp': '2025-09-10 02:28:45.179988', 'step': 6068, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 4746299996032}, 'timestamp': '2025-09-10 02:28:45.214966', 'step': 6068, 'epoch': 3} {'type': 'loss', 'content': 0.00023520128161180764, 'timestamp': '2025-09-10 02:28:45.219084', 'step': 6069, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 6644714900096}, 'timestamp': '2025-09-10 02:28:45.255633', 'step': 6069, 'epoch': 3} {'type': 'loss', 'content': 0.0004728248168248683, 'timestamp': '2025-09-10 02:28:45.262485', 'step': 6070, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 368], 'flops': 10916148434240}, 'timestamp': '2025-09-10 02:28:45.306098', 'step': 6070, 'epoch': 3} {'type': 'loss', 'content': 0.0002277484891237691, 'timestamp': '2025-09-10 02:28:45.319908', 'step': 6071, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 176], 'flops': 5220903722048}, 'timestamp': '2025-09-10 02:28:45.354726', 'step': 6071, 'epoch': 3} {'type': 'loss', 'content': 5.23619819432497e-05, 'timestamp': '2025-09-10 02:28:45.385846', 'step': 6072, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 320], 'flops': 9492337256192}, 'timestamp': '2025-09-10 02:28:45.424762', 'step': 6072, 'epoch': 3} {'type': 'loss', 'content': 0.00031708512688055634, 'timestamp': '2025-09-10 02:28:45.435251', 'step': 6073, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 6644714900096}, 'timestamp': '2025-09-10 02:28:45.468626', 'step': 6073, 'epoch': 3} {'type': 'loss', 'content': 0.0004512048908509314, 'timestamp': '2025-09-10 02:28:45.475803', 'step': 6074, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 8068526078144}, 'timestamp': '2025-09-10 02:28:45.510771', 'step': 6074, 'epoch': 3} {'type': 'loss', 'content': 0.00019133243768010288, 'timestamp': '2025-09-10 02:28:45.520955', 'step': 6075, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 320], 'flops': 9492337256192}, 'timestamp': '2025-09-10 02:28:45.563511', 'step': 6075, 'epoch': 3} {'type': 'loss', 'content': 0.0029035231564193964, 'timestamp': '2025-09-10 02:28:45.596965', 'step': 6076, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 352], 'flops': 10441544708224}, 'timestamp': '2025-09-10 02:28:45.630230', 'step': 6076, 'epoch': 3} {'type': 'loss', 'content': 0.0011966234305873513, 'timestamp': '2025-09-10 02:28:45.643233', 'step': 6077, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 8068526078144}, 'timestamp': '2025-09-10 02:28:45.678334', 'step': 6077, 'epoch': 3} {'type': 'loss', 'content': 3.068596561206505e-05, 'timestamp': '2025-09-10 02:28:45.688687', 'step': 6078, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 368], 'flops': 10916148434240}, 'timestamp': '2025-09-10 02:28:45.727400', 'step': 6078, 'epoch': 3} {'type': 'loss', 'content': 0.00018004176672548056, 'timestamp': '2025-09-10 02:28:45.741205', 'step': 6079, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 7593922352128}, 'timestamp': '2025-09-10 02:28:45.792778', 'step': 6079, 'epoch': 3} {'type': 'loss', 'content': 0.00031091499840840697, 'timestamp': '2025-09-10 02:28:45.821496', 'step': 6080, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 6644714900096}, 'timestamp': '2025-09-10 02:28:45.855903', 'step': 6080, 'epoch': 3} {'type': 'loss', 'content': 0.006474101450294256, 'timestamp': '2025-09-10 02:28:45.863049', 'step': 6081, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 8068526078144}, 'timestamp': '2025-09-10 02:28:45.900461', 'step': 6081, 'epoch': 3} {'type': 'loss', 'content': 0.0032853742595762014, 'timestamp': '2025-09-10 02:28:45.910994', 'step': 6082, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 6644714900096}, 'timestamp': '2025-09-10 02:28:45.947133', 'step': 6082, 'epoch': 3} {'type': 'loss', 'content': 5.7726305385585874e-05, 'timestamp': '2025-09-10 02:28:45.954240', 'step': 6083, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 336], 'flops': 9966940982208}, 'timestamp': '2025-09-10 02:28:45.992236', 'step': 6083, 'epoch': 3} {'type': 'loss', 'content': 0.00022245707805268466, 'timestamp': '2025-09-10 02:28:46.026454', 'step': 6084, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 8068526078144}, 'timestamp': '2025-09-10 02:28:46.060512', 'step': 6084, 'epoch': 3} {'type': 'loss', 'content': 0.000945181876886636, 'timestamp': '2025-09-10 02:28:46.068454', 'step': 6085, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 336], 'flops': 9966940982208}, 'timestamp': '2025-09-10 02:28:46.103091', 'step': 6085, 'epoch': 3} {'type': 'loss', 'content': 0.004430218134075403, 'timestamp': '2025-09-10 02:28:46.116492', 'step': 6086, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 5695507448064}, 'timestamp': '2025-09-10 02:28:46.151682', 'step': 6086, 'epoch': 3} {'type': 'loss', 'content': 4.371793329482898e-05, 'timestamp': '2025-09-10 02:28:46.156048', 'step': 6087, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 7593922352128}, 'timestamp': '2025-09-10 02:28:46.191776', 'step': 6087, 'epoch': 3} {'type': 'loss', 'content': 0.00032512666075490415, 'timestamp': '2025-09-10 02:28:46.220288', 'step': 6088, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 7593922352128}, 'timestamp': '2025-09-10 02:28:46.253403', 'step': 6088, 'epoch': 3} {'type': 'loss', 'content': 0.00032571834162808955, 'timestamp': '2025-09-10 02:28:46.258905', 'step': 6089, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 8543129804160}, 'timestamp': '2025-09-10 02:28:46.294231', 'step': 6089, 'epoch': 3} {'type': 'loss', 'content': 0.016125816851854324, 'timestamp': '2025-09-10 02:28:46.305045', 'step': 6090, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 4746299996032}, 'timestamp': '2025-09-10 02:28:46.336814', 'step': 6090, 'epoch': 3} {'type': 'loss', 'content': 2.697331365197897e-05, 'timestamp': '2025-09-10 02:28:46.339693', 'step': 6091, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 8068526078144}, 'timestamp': '2025-09-10 02:28:46.378084', 'step': 6091, 'epoch': 3} {'type': 'loss', 'content': 0.0005751307471655309, 'timestamp': '2025-09-10 02:28:46.409404', 'step': 6092, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 4746299996032}, 'timestamp': '2025-09-10 02:28:46.450036', 'step': 6092, 'epoch': 3} {'type': 'loss', 'content': 4.076838376931846e-05, 'timestamp': '2025-09-10 02:28:46.456193', 'step': 6093, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 624], 'flops': 18509808050496}, 'timestamp': '2025-09-10 02:28:46.508832', 'step': 6093, 'epoch': 3} {'type': 'loss', 'content': 0.0009186511742882431, 'timestamp': '2025-09-10 02:28:46.530588', 'step': 6094, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 6644714900096}, 'timestamp': '2025-09-10 02:28:46.562802', 'step': 6094, 'epoch': 3} {'type': 'loss', 'content': 0.006315763108432293, 'timestamp': '2025-09-10 02:28:46.569897', 'step': 6095, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 176], 'flops': 5220903722048}, 'timestamp': '2025-09-10 02:28:46.600861', 'step': 6095, 'epoch': 3} {'type': 'loss', 'content': 0.00013419199967756867, 'timestamp': '2025-09-10 02:28:46.625650', 'step': 6096, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 480], 'flops': 14238374516352}, 'timestamp': '2025-09-10 02:28:46.665746', 'step': 6096, 'epoch': 3} {'type': 'loss', 'content': 0.0002742501674219966, 'timestamp': '2025-09-10 02:28:46.682715', 'step': 6097, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 320], 'flops': 9492337256192}, 'timestamp': '2025-09-10 02:28:46.720741', 'step': 6097, 'epoch': 3} {'type': 'loss', 'content': 0.0033741388469934464, 'timestamp': '2025-09-10 02:28:46.733329', 'step': 6098, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 6170111174080}, 'timestamp': '2025-09-10 02:28:46.764850', 'step': 6098, 'epoch': 3} {'type': 'loss', 'content': 3.133829522994347e-05, 'timestamp': '2025-09-10 02:28:46.771679', 'step': 6099, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 176], 'flops': 5220903722048}, 'timestamp': '2025-09-10 02:28:46.811405', 'step': 6099, 'epoch': 3} {'type': 'loss', 'content': 4.3357093090889975e-05, 'timestamp': '2025-09-10 02:28:46.835866', 'step': 6100, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 8543129804160}, 'timestamp': '2025-09-10 02:28:46.875977', 'step': 6100, 'epoch': 3} {'type': 'loss', 'content': 0.00026234795222990215, 'timestamp': '2025-09-10 02:28:46.884317', 'step': 6101, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 5695507448064}, 'timestamp': '2025-09-10 02:28:46.923251', 'step': 6101, 'epoch': 3} {'type': 'loss', 'content': 0.0012434854870662093, 'timestamp': '2025-09-10 02:28:46.927769', 'step': 6102, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 6644714900096}, 'timestamp': '2025-09-10 02:28:46.960500', 'step': 6102, 'epoch': 3} {'type': 'loss', 'content': 0.021313535049557686, 'timestamp': '2025-09-10 02:28:46.967469', 'step': 6103, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 6170111174080}, 'timestamp': '2025-09-10 02:28:47.000997', 'step': 6103, 'epoch': 3} {'type': 'loss', 'content': 0.003981561399996281, 'timestamp': '2025-09-10 02:28:47.028737', 'step': 6104, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 6644714900096}, 'timestamp': '2025-09-10 02:28:47.065770', 'step': 6104, 'epoch': 3} {'type': 'loss', 'content': 0.00014674547128379345, 'timestamp': '2025-09-10 02:28:47.074269', 'step': 6105, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 4746299996032}, 'timestamp': '2025-09-10 02:28:47.109901', 'step': 6105, 'epoch': 3} {'type': 'loss', 'content': 0.0017406666884198785, 'timestamp': '2025-09-10 02:28:47.112275', 'step': 6106, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 6644714900096}, 'timestamp': '2025-09-10 02:28:47.142972', 'step': 6106, 'epoch': 3} {'type': 'loss', 'content': 0.0001538008509669453, 'timestamp': '2025-09-10 02:28:47.150025', 'step': 6107, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 320], 'flops': 9492337256192}, 'timestamp': '2025-09-10 02:28:47.184350', 'step': 6107, 'epoch': 3} {'type': 'loss', 'content': 0.0009951989632099867, 'timestamp': '2025-09-10 02:28:47.217782', 'step': 6108, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 416], 'flops': 12339959612288}, 'timestamp': '2025-09-10 02:28:47.257856', 'step': 6108, 'epoch': 3} {'type': 'loss', 'content': 0.0021144095808267593, 'timestamp': '2025-09-10 02:28:47.273336', 'step': 6109, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 8068526078144}, 'timestamp': '2025-09-10 02:28:47.307354', 'step': 6109, 'epoch': 3} {'type': 'loss', 'content': 0.0008319019107148051, 'timestamp': '2025-09-10 02:28:47.317527', 'step': 6110, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 176], 'flops': 5220903722048}, 'timestamp': '2025-09-10 02:28:47.353515', 'step': 6110, 'epoch': 3} {'type': 'loss', 'content': 0.00018621633353177458, 'timestamp': '2025-09-10 02:28:47.358189', 'step': 6111, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 7119318626112}, 'timestamp': '2025-09-10 02:28:47.398595', 'step': 6111, 'epoch': 3} {'type': 'loss', 'content': 0.03902193531394005, 'timestamp': '2025-09-10 02:28:47.426998', 'step': 6112, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 5695507448064}, 'timestamp': '2025-09-10 02:28:47.458338', 'step': 6112, 'epoch': 3} {'type': 'loss', 'content': 5.721451816498302e-05, 'timestamp': '2025-09-10 02:28:47.460447', 'step': 6113, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 4746299996032}, 'timestamp': '2025-09-10 02:28:47.491411', 'step': 6113, 'epoch': 3} {'type': 'loss', 'content': 0.000635522126685828, 'timestamp': '2025-09-10 02:28:47.497979', 'step': 6114, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 6644714900096}, 'timestamp': '2025-09-10 02:28:47.532982', 'step': 6114, 'epoch': 3} {'type': 'loss', 'content': 0.0002452080079820007, 'timestamp': '2025-09-10 02:28:47.540095', 'step': 6115, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 7593922352128}, 'timestamp': '2025-09-10 02:28:47.574851', 'step': 6115, 'epoch': 3} {'type': 'loss', 'content': 0.0011523573193699121, 'timestamp': '2025-09-10 02:28:47.603452', 'step': 6116, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 8068526078144}, 'timestamp': '2025-09-10 02:28:47.650342', 'step': 6116, 'epoch': 3} {'type': 'loss', 'content': 0.00015113291738089174, 'timestamp': '2025-09-10 02:28:47.658203', 'step': 6117, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 176], 'flops': 5220903722048}, 'timestamp': '2025-09-10 02:28:47.701167', 'step': 6117, 'epoch': 3} {'type': 'loss', 'content': 0.0004939243663102388, 'timestamp': '2025-09-10 02:28:47.709590', 'step': 6118, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 608], 'flops': 18035204324480}, 'timestamp': '2025-09-10 02:28:47.775156', 'step': 6118, 'epoch': 3} {'type': 'loss', 'content': 0.02475116029381752, 'timestamp': '2025-09-10 02:28:47.796645', 'step': 6119, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 5695507448064}, 'timestamp': '2025-09-10 02:28:47.827951', 'step': 6119, 'epoch': 3} {'type': 'loss', 'content': 3.918137008440681e-05, 'timestamp': '2025-09-10 02:28:47.853222', 'step': 6120, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 176], 'flops': 5220903722048}, 'timestamp': '2025-09-10 02:28:47.885164', 'step': 6120, 'epoch': 3} {'type': 'loss', 'content': 3.542963168001734e-05, 'timestamp': '2025-09-10 02:28:47.887509', 'step': 6121, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 5695507448064}, 'timestamp': '2025-09-10 02:28:47.917812', 'step': 6121, 'epoch': 3} {'type': 'loss', 'content': 0.046116914600133896, 'timestamp': '2025-09-10 02:28:47.922199', 'step': 6122, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 7119318626112}, 'timestamp': '2025-09-10 02:28:47.954445', 'step': 6122, 'epoch': 3} {'type': 'loss', 'content': 0.0024645677767693996, 'timestamp': '2025-09-10 02:28:47.962071', 'step': 6123, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 5695507448064}, 'timestamp': '2025-09-10 02:28:47.992822', 'step': 6123, 'epoch': 3} {'type': 'loss', 'content': 0.0003803297586273402, 'timestamp': '2025-09-10 02:28:48.018057', 'step': 6124, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 7593922352128}, 'timestamp': '2025-09-10 02:28:48.056437', 'step': 6124, 'epoch': 3} {'type': 'loss', 'content': 0.00047259125858545303, 'timestamp': '2025-09-10 02:28:48.061808', 'step': 6125, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 5695507448064}, 'timestamp': '2025-09-10 02:28:48.093197', 'step': 6125, 'epoch': 3} {'type': 'loss', 'content': 0.00037615117616951466, 'timestamp': '2025-09-10 02:28:48.097671', 'step': 6126, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 6170111174080}, 'timestamp': '2025-09-10 02:28:48.129051', 'step': 6126, 'epoch': 3} {'type': 'loss', 'content': 0.0004323399916756898, 'timestamp': '2025-09-10 02:28:48.135875', 'step': 6127, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 7119318626112}, 'timestamp': '2025-09-10 02:28:48.172723', 'step': 6127, 'epoch': 3} {'type': 'loss', 'content': 0.0437922365963459, 'timestamp': '2025-09-10 02:28:48.201023', 'step': 6128, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 7593922352128}, 'timestamp': '2025-09-10 02:28:48.235490', 'step': 6128, 'epoch': 3} {'type': 'loss', 'content': 0.000819290173240006, 'timestamp': '2025-09-10 02:28:48.240886', 'step': 6129, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 304], 'flops': 9017733530176}, 'timestamp': '2025-09-10 02:28:48.280162', 'step': 6129, 'epoch': 3} {'type': 'loss', 'content': 0.00019732918008230627, 'timestamp': '2025-09-10 02:28:48.292529', 'step': 6130, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 7119318626112}, 'timestamp': '2025-09-10 02:28:48.323914', 'step': 6130, 'epoch': 3} {'type': 'loss', 'content': 0.0008840580121614039, 'timestamp': '2025-09-10 02:28:48.331410', 'step': 6131, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 6170111174080}, 'timestamp': '2025-09-10 02:28:48.361984', 'step': 6131, 'epoch': 3} {'type': 'loss', 'content': 0.019107328727841377, 'timestamp': '2025-09-10 02:28:48.389830', 'step': 6132, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 8543129804160}, 'timestamp': '2025-09-10 02:28:48.421969', 'step': 6132, 'epoch': 3} {'type': 'loss', 'content': 8.087460446404293e-05, 'timestamp': '2025-09-10 02:28:48.430257', 'step': 6133, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 7119318626112}, 'timestamp': '2025-09-10 02:28:48.462010', 'step': 6133, 'epoch': 3} {'type': 'loss', 'content': 0.008161481469869614, 'timestamp': '2025-09-10 02:28:48.469397', 'step': 6134, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 7593922352128}, 'timestamp': '2025-09-10 02:28:48.500501', 'step': 6134, 'epoch': 3} {'type': 'loss', 'content': 0.04065088555216789, 'timestamp': '2025-09-10 02:28:48.508237', 'step': 6135, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 7119318626112}, 'timestamp': '2025-09-10 02:28:48.539182', 'step': 6135, 'epoch': 3} {'type': 'loss', 'content': 0.001969260396435857, 'timestamp': '2025-09-10 02:28:48.567465', 'step': 6136, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 6644714900096}, 'timestamp': '2025-09-10 02:28:48.601560', 'step': 6136, 'epoch': 3} {'type': 'loss', 'content': 0.0010869913967326283, 'timestamp': '2025-09-10 02:28:48.606358', 'step': 6137, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 5695507448064}, 'timestamp': '2025-09-10 02:28:48.637725', 'step': 6137, 'epoch': 3} {'type': 'loss', 'content': 0.0005913428612984717, 'timestamp': '2025-09-10 02:28:48.642348', 'step': 6138, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 7119318626112}, 'timestamp': '2025-09-10 02:28:48.675095', 'step': 6138, 'epoch': 3} {'type': 'loss', 'content': 6.689595466013998e-05, 'timestamp': '2025-09-10 02:28:48.682623', 'step': 6139, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 7119318626112}, 'timestamp': '2025-09-10 02:28:48.714074', 'step': 6139, 'epoch': 3} {'type': 'loss', 'content': 0.005156568717211485, 'timestamp': '2025-09-10 02:28:48.742463', 'step': 6140, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 304], 'flops': 9017733530176}, 'timestamp': '2025-09-10 02:28:48.774908', 'step': 6140, 'epoch': 3} {'type': 'loss', 'content': 0.00014441793609876186, 'timestamp': '2025-09-10 02:28:48.784687', 'step': 6141, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 7593922352128}, 'timestamp': '2025-09-10 02:28:48.818569', 'step': 6141, 'epoch': 3} {'type': 'loss', 'content': 0.0004379312158562243, 'timestamp': '2025-09-10 02:28:48.826274', 'step': 6142, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 6170111174080}, 'timestamp': '2025-09-10 02:28:48.859648', 'step': 6142, 'epoch': 3} {'type': 'loss', 'content': 0.012073171325027943, 'timestamp': '2025-09-10 02:28:48.866500', 'step': 6143, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 6644714900096}, 'timestamp': '2025-09-10 02:28:48.904904', 'step': 6143, 'epoch': 3} {'type': 'loss', 'content': 0.0002663929190021008, 'timestamp': '2025-09-10 02:28:48.932951', 'step': 6144, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 656], 'flops': 19459015502528}, 'timestamp': '2025-09-10 02:28:48.986342', 'step': 6144, 'epoch': 3} {'type': 'loss', 'content': 0.0007393588311970234, 'timestamp': '2025-09-10 02:28:49.010022', 'step': 6145, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 6170111174080}, 'timestamp': '2025-09-10 02:28:49.041968', 'step': 6145, 'epoch': 3} {'type': 'loss', 'content': 0.00043535669101402164, 'timestamp': '2025-09-10 02:28:49.048697', 'step': 6146, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 6170111174080}, 'timestamp': '2025-09-10 02:28:49.080271', 'step': 6146, 'epoch': 3} {'type': 'loss', 'content': 0.002159666968509555, 'timestamp': '2025-09-10 02:28:49.087247', 'step': 6147, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 5695507448064}, 'timestamp': '2025-09-10 02:28:49.117573', 'step': 6147, 'epoch': 3} {'type': 'loss', 'content': 0.001196373486891389, 'timestamp': '2025-09-10 02:28:49.142819', 'step': 6148, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 8543129804160}, 'timestamp': '2025-09-10 02:28:49.173591', 'step': 6148, 'epoch': 3} {'type': 'loss', 'content': 0.02430625446140766, 'timestamp': '2025-09-10 02:28:49.182109', 'step': 6149, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 8068526078144}, 'timestamp': '2025-09-10 02:28:49.213236', 'step': 6149, 'epoch': 3} {'type': 'loss', 'content': 0.001197171164676547, 'timestamp': '2025-09-10 02:28:49.223428', 'step': 6150, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 176], 'flops': 5220903722048}, 'timestamp': '2025-09-10 02:28:49.254356', 'step': 6150, 'epoch': 3} {'type': 'loss', 'content': 0.00013094481255393475, 'timestamp': '2025-09-10 02:28:49.258537', 'step': 6151, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-10 02:28:49.288607', 'step': 6151, 'epoch': 3} {'type': 'loss', 'content': 0.0014958838000893593, 'timestamp': '2025-09-10 02:28:49.312699', 'step': 6152, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 6644714900096}, 'timestamp': '2025-09-10 02:28:49.344110', 'step': 6152, 'epoch': 3} {'type': 'loss', 'content': 0.00031426880741491914, 'timestamp': '2025-09-10 02:28:49.348821', 'step': 6153, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 8068526078144}, 'timestamp': '2025-09-10 02:28:49.379500', 'step': 6153, 'epoch': 3} {'type': 'loss', 'content': 0.0012687371345236897, 'timestamp': '2025-09-10 02:28:49.389987', 'step': 6154, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 8543129804160}, 'timestamp': '2025-09-10 02:28:49.421111', 'step': 6154, 'epoch': 3} {'type': 'loss', 'content': 0.00014976828242652118, 'timestamp': '2025-09-10 02:28:49.432014', 'step': 6155, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 6170111174080}, 'timestamp': '2025-09-10 02:28:49.464403', 'step': 6155, 'epoch': 3} {'type': 'loss', 'content': 0.0032478817738592625, 'timestamp': '2025-09-10 02:28:49.492166', 'step': 6156, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 7119318626112}, 'timestamp': '2025-09-10 02:28:49.523071', 'step': 6156, 'epoch': 3} {'type': 'loss', 'content': 0.0014056127984076738, 'timestamp': '2025-09-10 02:28:49.528168', 'step': 6157, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 7593922352128}, 'timestamp': '2025-09-10 02:28:49.559237', 'step': 6157, 'epoch': 3} {'type': 'loss', 'content': 0.0010582717368379235, 'timestamp': '2025-09-10 02:28:49.567068', 'step': 6158, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 6644714900096}, 'timestamp': '2025-09-10 02:28:49.599764', 'step': 6158, 'epoch': 3} {'type': 'loss', 'content': 0.00013609221787191927, 'timestamp': '2025-09-10 02:28:49.606908', 'step': 6159, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 6644714900096}, 'timestamp': '2025-09-10 02:28:49.639359', 'step': 6159, 'epoch': 3} {'type': 'loss', 'content': 0.0004430489207152277, 'timestamp': '2025-09-10 02:28:49.667476', 'step': 6160, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 8068526078144}, 'timestamp': '2025-09-10 02:28:49.699751', 'step': 6160, 'epoch': 3} {'type': 'loss', 'content': 0.001818513497710228, 'timestamp': '2025-09-10 02:28:49.707627', 'step': 6161, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 416], 'flops': 12339959612288}, 'timestamp': '2025-09-10 02:28:49.747955', 'step': 6161, 'epoch': 3} {'type': 'loss', 'content': 0.00037555742892436683, 'timestamp': '2025-09-10 02:28:49.763907', 'step': 6162, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 320], 'flops': 9492337256192}, 'timestamp': '2025-09-10 02:28:49.796138', 'step': 6162, 'epoch': 3} {'type': 'loss', 'content': 0.0007206489099189639, 'timestamp': '2025-09-10 02:28:49.808649', 'step': 6163, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 8068526078144}, 'timestamp': '2025-09-10 02:28:49.839676', 'step': 6163, 'epoch': 3} {'type': 'loss', 'content': 0.0002763103402685374, 'timestamp': '2025-09-10 02:28:49.870777', 'step': 6164, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 6644714900096}, 'timestamp': '2025-09-10 02:28:49.902544', 'step': 6164, 'epoch': 3} {'type': 'loss', 'content': 0.0009474234539084136, 'timestamp': '2025-09-10 02:28:49.907330', 'step': 6165, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 8068526078144}, 'timestamp': '2025-09-10 02:28:49.939488', 'step': 6165, 'epoch': 3} {'type': 'loss', 'content': 0.0009898262796923518, 'timestamp': '2025-09-10 02:28:49.949704', 'step': 6166, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 7119318626112}, 'timestamp': '2025-09-10 02:28:49.980929', 'step': 6166, 'epoch': 3} {'type': 'loss', 'content': 0.000859494844917208, 'timestamp': '2025-09-10 02:28:49.988378', 'step': 6167, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 320], 'flops': 9492337256192}, 'timestamp': '2025-09-10 02:28:50.020605', 'step': 6167, 'epoch': 3} {'type': 'loss', 'content': 0.0009193348814733326, 'timestamp': '2025-09-10 02:28:50.053933', 'step': 6168, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 8543129804160}, 'timestamp': '2025-09-10 02:28:50.093804', 'step': 6168, 'epoch': 3} {'type': 'loss', 'content': 0.004276874475181103, 'timestamp': '2025-09-10 02:28:50.102501', 'step': 6169, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 7119318626112}, 'timestamp': '2025-09-10 02:28:50.136920', 'step': 6169, 'epoch': 3} {'type': 'loss', 'content': 0.00454701716080308, 'timestamp': '2025-09-10 02:28:50.144309', 'step': 6170, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 8543129804160}, 'timestamp': '2025-09-10 02:28:50.175129', 'step': 6170, 'epoch': 3} {'type': 'loss', 'content': 0.0004608361341524869, 'timestamp': '2025-09-10 02:28:50.186036', 'step': 6171, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 7119318626112}, 'timestamp': '2025-09-10 02:28:50.220449', 'step': 6171, 'epoch': 3} {'type': 'loss', 'content': 0.000643234234303236, 'timestamp': '2025-09-10 02:28:50.248738', 'step': 6172, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 6170111174080}, 'timestamp': '2025-09-10 02:28:50.281343', 'step': 6172, 'epoch': 3} {'type': 'loss', 'content': 0.001443680957891047, 'timestamp': '2025-09-10 02:28:50.285965', 'step': 6173, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 176], 'flops': 5220903722048}, 'timestamp': '2025-09-10 02:28:50.318501', 'step': 6173, 'epoch': 3} {'type': 'loss', 'content': 0.007593140471726656, 'timestamp': '2025-09-10 02:28:50.322577', 'step': 6174, 'epoch': 3} {'type': 'flops', 'content': [{'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5695213340928}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3796808916480}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4429610391296}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4746011128704}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4746011128704}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6011614078336}, {'type': 'perplexity', 'in_batch_dim': [8, 736], 'batch_size': 8, 'flops': 14554433988352}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3796808916480}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4113209653888}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3796808916480}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4746011128704}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2847606704256}, {'type': 'perplexity', 'in_batch_dim': [8, 320], 'batch_size': 8, 'flops': 6328014815744}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5378812603520}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3796808916480}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4746011128704}, {'type': 'perplexity', 'in_batch_dim': [8, 352], 'batch_size': 8, 'flops': 6960816290560}, {'type': 'perplexity', 'in_batch_dim': [8, 384], 'batch_size': 8, 'flops': 7593617765376}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5378812603520}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4429610391296}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4113209653888}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5062411866112}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5062411866112}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4429610391296}, {'type': 'perplexity', 'in_batch_dim': [8, 480], 'batch_size': 8, 'flops': 9492022189824}, {'type': 'perplexity', 'in_batch_dim': [8, 448], 'batch_size': 8, 'flops': 8859220715008}, {'type': 'perplexity', 'in_batch_dim': [8, 320], 'batch_size': 8, 'flops': 6328014815744}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3796808916480}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6011614078336}, {'type': 'perplexity', 'in_batch_dim': [8, 320], 'batch_size': 8, 'flops': 6328014815744}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5695213340928}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4746011128704}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4429610391296}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5378812603520}, {'type': 'perplexity', 'in_batch_dim': [8, 544], 'batch_size': 8, 'flops': 10757625139456}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4429610391296}, {'type': 'perplexity', 'in_batch_dim': [8, 320], 'batch_size': 8, 'flops': 6328014815744}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4113209653888}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4113209653888}, {'type': 'perplexity', 'in_batch_dim': [8, 320], 'batch_size': 8, 'flops': 6328014815744}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3480408179072}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5062411866112}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6011614078336}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4429610391296}, {'type': 'perplexity', 'in_batch_dim': [8, 384], 'batch_size': 8, 'flops': 7593617765376}, {'type': 'perplexity', 'in_batch_dim': [8, 416], 'batch_size': 8, 'flops': 8226419240192}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4746011128704}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3480408179072}, {'type': 'perplexity', 'in_batch_dim': [8, 384], 'batch_size': 8, 'flops': 7593617765376}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3480408179072}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3480408179072}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5378812603520}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3796808916480}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4113209653888}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5062411866112}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5378812603520}, {'type': 'perplexity', 'in_batch_dim': [8, 320], 'batch_size': 8, 'flops': 6328014815744}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4429610391296}, {'type': 'perplexity', 'in_batch_dim': [8, 160], 'batch_size': 8, 'flops': 3164007441664}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4113209653888}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5695213340928}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3796808916480}, {'type': 'perplexity', 'in_batch_dim': [8, 160], 'batch_size': 8, 'flops': 3164007441664}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6011614078336}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5062411866112}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4429610391296}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3480408179072}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4746011128704}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3480408179072}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4113209653888}, {'type': 'perplexity', 'in_batch_dim': [8, 624], 'batch_size': 8, 'flops': 12339628826496}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4113209653888}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5695213340928}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5062411866112}, {'type': 'perplexity', 'in_batch_dim': [8, 384], 'batch_size': 8, 'flops': 7593617765376}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4429610391296}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4113209653888}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5062411866112}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4429610391296}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5695213340928}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3796808916480}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5378812603520}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4429610391296}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5378812603520}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3796808916480}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5695213340928}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4429610391296}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5062411866112}, {'type': 'perplexity', 'in_batch_dim': [8, 160], 'batch_size': 8, 'flops': 3164007441664}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4746011128704}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5695213340928}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3796808916480}, {'type': 'perplexity', 'in_batch_dim': [8, 416], 'batch_size': 8, 'flops': 8226419240192}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3796808916480}, {'type': 'perplexity', 'in_batch_dim': [8, 160], 'batch_size': 8, 'flops': 3164007441664}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4746011128704}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3796808916480}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2847606704256}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5378812603520}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4429610391296}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4113209653888}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4429610391296}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6011614078336}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4113209653888}, {'type': 'perplexity', 'in_batch_dim': [8, 320], 'batch_size': 8, 'flops': 6328014815744}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4746011128704}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4746011128704}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3796808916480}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5062411866112}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3480408179072}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3796808916480}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3480408179072}, {'type': 'perplexity', 'in_batch_dim': [8, 352], 'batch_size': 8, 'flops': 6960816290560}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5062411866112}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4429610391296}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6011614078336}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5062411866112}, {'type': 'perplexity', 'in_batch_dim': [8, 576], 'batch_size': 8, 'flops': 11390426614272}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4429610391296}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3796808916480}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6011614078336}, {'type': 'perplexity', 'in_batch_dim': [8, 432], 'batch_size': 8, 'flops': 8542819977600}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5378812603520}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5378812603520}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5062411866112}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4113209653888}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4113209653888}, {'type': 'perplexity', 'in_batch_dim': [8, 368], 'batch_size': 8, 'flops': 7277217027968}, {'type': 'perplexity', 'in_batch_dim': [8, 160], 'batch_size': 8, 'flops': 3164007441664}, {'type': 'perplexity', 'in_batch_dim': [8, 368], 'batch_size': 8, 'flops': 7277217027968}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3796808916480}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4746011128704}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4113209653888}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4429610391296}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5695213340928}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5695213340928}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5062411866112}, {'type': 'perplexity', 'in_batch_dim': [8, 432], 'batch_size': 8, 'flops': 8542819977600}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5062411866112}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4113209653888}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3480408179072}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3480408179072}, {'type': 'perplexity', 'in_batch_dim': [8, 160], 'batch_size': 8, 'flops': 3164007441664}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4113209653888}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3796808916480}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5062411866112}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 368], 'batch_size': 8, 'flops': 7277217027968}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5378812603520}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5378812603520}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5062411866112}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2847606704256}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4429610391296}, {'type': 'perplexity', 'in_batch_dim': [8, 320], 'batch_size': 8, 'flops': 6328014815744}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3796808916480}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4429610391296}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4113209653888}, {'type': 'perplexity', 'in_batch_dim': [8, 384], 'batch_size': 8, 'flops': 7593617765376}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5378812603520}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4746011128704}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3480408179072}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5062411866112}, {'type': 'perplexity', 'in_batch_dim': [8, 320], 'batch_size': 8, 'flops': 6328014815744}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5378812603520}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4113209653888}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4113209653888}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5695213340928}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3480408179072}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6011614078336}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4746011128704}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5378812603520}, {'type': 'perplexity', 'in_batch_dim': [8, 320], 'batch_size': 8, 'flops': 6328014815744}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4429610391296}, {'type': 'perplexity', 'in_batch_dim': [8, 320], 'batch_size': 8, 'flops': 6328014815744}, {'type': 'perplexity', 'in_batch_dim': [8, 416], 'batch_size': 8, 'flops': 8226419240192}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5062411866112}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5062411866112}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2847606704256}, {'type': 'perplexity', 'in_batch_dim': [8, 336], 'batch_size': 8, 'flops': 6644415553152}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6011614078336}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5062411866112}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4113209653888}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5695213340928}, {'type': 'perplexity', 'in_batch_dim': [8, 416], 'batch_size': 8, 'flops': 8226419240192}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3796808916480}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5695213340928}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4746011128704}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4429610391296}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4113209653888}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3480408179072}, {'type': 'perplexity', 'in_batch_dim': [8, 320], 'batch_size': 8, 'flops': 6328014815744}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3480408179072}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5378812603520}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6011614078336}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4429610391296}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5695213340928}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5695213340928}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3480408179072}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4429610391296}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5062411866112}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3480408179072}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4113209653888}, {'type': 'perplexity', 'in_batch_dim': [8, 352], 'batch_size': 8, 'flops': 6960816290560}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4113209653888}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2847606704256}, {'type': 'perplexity', 'in_batch_dim': [8, 160], 'batch_size': 8, 'flops': 3164007441664}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3796808916480}, {'type': 'perplexity', 'in_batch_dim': [8, 496], 'batch_size': 8, 'flops': 9808422927232}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3480408179072}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4113209653888}, {'type': 'perplexity', 'in_batch_dim': [8, 544], 'batch_size': 8, 'flops': 10757625139456}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6011614078336}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5378812603520}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5695213340928}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4746011128704}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5695213340928}, {'type': 'perplexity', 'in_batch_dim': [8, 384], 'batch_size': 8, 'flops': 7593617765376}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3480408179072}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5378812603520}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3480408179072}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5378812603520}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5062411866112}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3796808916480}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5062411866112}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5695213340928}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5378812603520}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4429610391296}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5062411866112}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3480408179072}, {'type': 'perplexity', 'in_batch_dim': [8, 656], 'batch_size': 8, 'flops': 12972430301312}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5062411866112}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3796808916480}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3480408179072}, {'type': 'perplexity', 'in_batch_dim': [8, 160], 'batch_size': 8, 'flops': 3164007441664}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5695213340928}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6011614078336}, {'type': 'perplexity', 'in_batch_dim': [8, 320], 'batch_size': 8, 'flops': 6328014815744}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5062411866112}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5062411866112}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5695213340928}, {'type': 'perplexity', 'in_batch_dim': [8, 320], 'batch_size': 8, 'flops': 6328014815744}, {'type': 'perplexity', 'in_batch_dim': [8, 384], 'batch_size': 8, 'flops': 7593617765376}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5062411866112}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4746011128704}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4113209653888}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4746011128704}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4746011128704}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3480408179072}, {'type': 'perplexity', 'in_batch_dim': [8, 320], 'batch_size': 8, 'flops': 6328014815744}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5695213340928}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5062411866112}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4113209653888}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4746011128704}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2847606704256}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3796808916480}, {'type': 'perplexity', 'in_batch_dim': [8, 336], 'batch_size': 8, 'flops': 6644415553152}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4113209653888}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4746011128704}, {'type': 'perplexity', 'in_batch_dim': [8, 496], 'batch_size': 8, 'flops': 9808422927232}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5378812603520}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5378812603520}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4429610391296}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5062411866112}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6011614078336}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4746011128704}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3796808916480}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4113209653888}, {'type': 'perplexity', 'in_batch_dim': [8, 496], 'batch_size': 8, 'flops': 9808422927232}, {'type': 'perplexity', 'in_batch_dim': [8, 448], 'batch_size': 8, 'flops': 8859220715008}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6011614078336}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3796808916480}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3480408179072}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5062411866112}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4429610391296}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4113209653888}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4746011128704}, {'type': 'perplexity', 'in_batch_dim': [8, 320], 'batch_size': 8, 'flops': 6328014815744}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6011614078336}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3480408179072}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3796808916480}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3796808916480}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4746011128704}, {'type': 'perplexity', 'in_batch_dim': [8, 416], 'batch_size': 8, 'flops': 8226419240192}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5695213340928}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6011614078336}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6011614078336}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4746011128704}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3480408179072}, {'type': 'perplexity', 'in_batch_dim': [8, 160], 'batch_size': 8, 'flops': 3164007441664}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5695213340928}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3480408179072}, {'type': 'perplexity', 'in_batch_dim': [8, 400], 'batch_size': 8, 'flops': 7910018502784}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4429610391296}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5378812603520}, {'type': 'perplexity', 'in_batch_dim': [8, 400], 'batch_size': 8, 'flops': 7910018502784}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5378812603520}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3480408179072}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6011614078336}, {'type': 'perplexity', 'in_batch_dim': [8, 160], 'batch_size': 8, 'flops': 3164007441664}, {'type': 'perplexity', 'in_batch_dim': [8, 352], 'batch_size': 8, 'flops': 6960816290560}, {'type': 'perplexity', 'in_batch_dim': [8, 320], 'batch_size': 8, 'flops': 6328014815744}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5062411866112}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5062411866112}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4113209653888}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4429610391296}, {'type': 'perplexity', 'in_batch_dim': [8, 352], 'batch_size': 8, 'flops': 6960816290560}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4113209653888}, {'type': 'perplexity', 'in_batch_dim': [8, 160], 'batch_size': 8, 'flops': 3164007441664}, {'type': 'perplexity', 'in_batch_dim': [8, 336], 'batch_size': 8, 'flops': 6644415553152}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4429610391296}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3796808916480}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5378812603520}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5378812603520}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4113209653888}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5378812603520}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3796808916480}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5378812603520}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5378812603520}, {'type': 'perplexity', 'in_batch_dim': [8, 160], 'batch_size': 8, 'flops': 3164007441664}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3480408179072}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6011614078336}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5695213340928}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5378812603520}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5378812603520}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4429610391296}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4746011128704}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6011614078336}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5062411866112}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6011614078336}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5378812603520}, {'type': 'perplexity', 'in_batch_dim': [8, 160], 'batch_size': 8, 'flops': 3164007441664}, {'type': 'perplexity', 'in_batch_dim': [8, 160], 'batch_size': 8, 'flops': 3164007441664}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5062411866112}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5062411866112}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3796808916480}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6011614078336}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4113209653888}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4746011128704}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5378812603520}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3480408179072}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3796808916480}, {'type': 'perplexity', 'in_batch_dim': [8, 320], 'batch_size': 8, 'flops': 6328014815744}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5062411866112}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4746011128704}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5062411866112}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4746011128704}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6011614078336}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3796808916480}, {'type': 'perplexity', 'in_batch_dim': [8, 160], 'batch_size': 8, 'flops': 3164007441664}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2847606704256}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4429610391296}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4746011128704}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5062411866112}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5378812603520}, {'type': 'perplexity', 'in_batch_dim': [8, 464], 'batch_size': 8, 'flops': 9175621452416}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4113209653888}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3480408179072}, {'type': 'perplexity', 'in_batch_dim': [8, 384], 'batch_size': 8, 'flops': 7593617765376}, {'type': 'perplexity', 'in_batch_dim': [8, 448], 'batch_size': 8, 'flops': 8859220715008}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5378812603520}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4113209653888}, {'type': 'perplexity', 'in_batch_dim': [8, 320], 'batch_size': 8, 'flops': 6328014815744}, {'type': 'perplexity', 'in_batch_dim': [8, 560], 'batch_size': 8, 'flops': 11074025876864}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5378812603520}, {'type': 'perplexity', 'in_batch_dim': [8, 160], 'batch_size': 8, 'flops': 3164007441664}, {'type': 'perplexity', 'in_batch_dim': [8, 320], 'batch_size': 8, 'flops': 6328014815744}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5062411866112}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6011614078336}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4746011128704}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6011614078336}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3796808916480}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4429610391296}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4113209653888}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4746011128704}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3480408179072}, {'type': 'perplexity', 'in_batch_dim': [8, 384], 'batch_size': 8, 'flops': 7593617765376}, {'type': 'perplexity', 'in_batch_dim': [8, 576], 'batch_size': 8, 'flops': 11390426614272}, {'type': 'perplexity', 'in_batch_dim': [8, 160], 'batch_size': 8, 'flops': 3164007441664}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4746011128704}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4746011128704}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4429610391296}, {'type': 'perplexity', 'in_batch_dim': [8, 336], 'batch_size': 8, 'flops': 6644415553152}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4746011128704}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3796808916480}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3796808916480}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5062411866112}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5378812603520}, {'type': 'perplexity', 'in_batch_dim': [8, 368], 'batch_size': 8, 'flops': 7277217027968}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3480408179072}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4113209653888}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4746011128704}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3480408179072}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4746011128704}, {'type': 'perplexity', 'in_batch_dim': [8, 1168], 'batch_size': 8, 'flops': 23097253898368}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5062411866112}, {'type': 'perplexity', 'in_batch_dim': [8, 160], 'batch_size': 8, 'flops': 3164007441664}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4746011128704}, {'type': 'perplexity', 'in_batch_dim': [8, 640], 'batch_size': 8, 'flops': 12656029563904}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5378812603520}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4429610391296}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4429610391296}, {'type': 'perplexity', 'in_batch_dim': [8, 384], 'batch_size': 8, 'flops': 7593617765376}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3480408179072}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4113209653888}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3796808916480}, {'type': 'perplexity', 'in_batch_dim': [8, 336], 'batch_size': 8, 'flops': 6644415553152}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3480408179072}, {'type': 'perplexity', 'in_batch_dim': [6, 208], 'batch_size': 8, 'flops': 4113209653888}], 'timestamp': '2025-09-10 02:29:00.483350', 'step': 6174, 'epoch': 3} {'type': 'pplx', 'content': 23762090.47420289, 'timestamp': '2025-09-10 02:29:00.489527', 'step': 6174, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 7119318626112}, 'timestamp': '2025-09-10 02:29:00.522250', 'step': 6174, 'epoch': 3} {'type': 'loss', 'content': 0.0009576130541972816, 'timestamp': '2025-09-10 02:29:00.528475', 'step': 6175, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 7593922352128}, 'timestamp': '2025-09-10 02:29:00.565354', 'step': 6175, 'epoch': 3} {'type': 'loss', 'content': 0.006735025439411402, 'timestamp': '2025-09-10 02:29:00.593441', 'step': 6176, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 8543129804160}, 'timestamp': '2025-09-10 02:29:00.627618', 'step': 6176, 'epoch': 3} {'type': 'loss', 'content': 0.005353983025997877, 'timestamp': '2025-09-10 02:29:00.635299', 'step': 6177, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 5695507448064}, 'timestamp': '2025-09-10 02:29:00.669053', 'step': 6177, 'epoch': 3} {'type': 'loss', 'content': 0.004146324936300516, 'timestamp': '2025-09-10 02:29:00.673169', 'step': 6178, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 5695507448064}, 'timestamp': '2025-09-10 02:29:00.709091', 'step': 6178, 'epoch': 3} {'type': 'loss', 'content': 0.0026420990470796824, 'timestamp': '2025-09-10 02:29:00.714523', 'step': 6179, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 480], 'flops': 14238374516352}, 'timestamp': '2025-09-10 02:29:00.758352', 'step': 6179, 'epoch': 3} {'type': 'loss', 'content': 0.005170282907783985, 'timestamp': '2025-09-10 02:29:00.796546', 'step': 6180, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 400], 'flops': 11865355886272}, 'timestamp': '2025-09-10 02:29:00.836181', 'step': 6180, 'epoch': 3} {'type': 'loss', 'content': 0.003013778477907181, 'timestamp': '2025-09-10 02:29:00.851358', 'step': 6181, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 7119318626112}, 'timestamp': '2025-09-10 02:29:00.886755', 'step': 6181, 'epoch': 3} {'type': 'loss', 'content': 0.001864836667664349, 'timestamp': '2025-09-10 02:29:00.897162', 'step': 6182, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 320], 'flops': 9492337256192}, 'timestamp': '2025-09-10 02:29:00.935035', 'step': 6182, 'epoch': 3} {'type': 'loss', 'content': 0.0007107080891728401, 'timestamp': '2025-09-10 02:29:00.946950', 'step': 6183, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 464], 'flops': 13763770790336}, 'timestamp': '2025-09-10 02:29:00.991051', 'step': 6183, 'epoch': 3} {'type': 'loss', 'content': 0.0008580170688219368, 'timestamp': '2025-09-10 02:29:01.028951', 'step': 6184, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 7119318626112}, 'timestamp': '2025-09-10 02:29:01.064026', 'step': 6184, 'epoch': 3} {'type': 'loss', 'content': 0.007555230520665646, 'timestamp': '2025-09-10 02:29:01.068065', 'step': 6185, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 448], 'flops': 13289167064320}, 'timestamp': '2025-09-10 02:29:01.111953', 'step': 6185, 'epoch': 3} {'type': 'loss', 'content': 0.0021088081412017345, 'timestamp': '2025-09-10 02:29:01.128296', 'step': 6186, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 6644714900096}, 'timestamp': '2025-09-10 02:29:01.162621', 'step': 6186, 'epoch': 3} {'type': 'loss', 'content': 0.0008245863718912005, 'timestamp': '2025-09-10 02:29:01.169216', 'step': 6187, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 8543129804160}, 'timestamp': '2025-09-10 02:29:01.202455', 'step': 6187, 'epoch': 3} {'type': 'loss', 'content': 0.007418735884130001, 'timestamp': '2025-09-10 02:29:01.233410', 'step': 6188, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 320], 'flops': 9492337256192}, 'timestamp': '2025-09-10 02:29:01.266922', 'step': 6188, 'epoch': 3} {'type': 'loss', 'content': 0.00027580276946537197, 'timestamp': '2025-09-10 02:29:01.275668', 'step': 6189, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 304], 'flops': 9017733530176}, 'timestamp': '2025-09-10 02:29:01.309893', 'step': 6189, 'epoch': 3} {'type': 'loss', 'content': 0.0014374948805198073, 'timestamp': '2025-09-10 02:29:01.320644', 'step': 6190, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 5695507448064}, 'timestamp': '2025-09-10 02:29:01.353527', 'step': 6190, 'epoch': 3} {'type': 'loss', 'content': 0.0003637855697888881, 'timestamp': '2025-09-10 02:29:01.357634', 'step': 6191, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 304], 'flops': 9017733530176}, 'timestamp': '2025-09-10 02:29:01.390955', 'step': 6191, 'epoch': 3} {'type': 'loss', 'content': 0.003992599435150623, 'timestamp': '2025-09-10 02:29:01.423392', 'step': 6192, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 6170111174080}, 'timestamp': '2025-09-10 02:29:01.457871', 'step': 6192, 'epoch': 3} {'type': 'loss', 'content': 4.350113886175677e-05, 'timestamp': '2025-09-10 02:29:01.461906', 'step': 6193, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 304], 'flops': 9017733530176}, 'timestamp': '2025-09-10 02:29:01.496563', 'step': 6193, 'epoch': 3} {'type': 'loss', 'content': 0.0022764094173908234, 'timestamp': '2025-09-10 02:29:01.507981', 'step': 6194, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 8543129804160}, 'timestamp': '2025-09-10 02:29:01.541893', 'step': 6194, 'epoch': 3} {'type': 'loss', 'content': 0.012956062331795692, 'timestamp': '2025-09-10 02:29:01.551989', 'step': 6195, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 8543129804160}, 'timestamp': '2025-09-10 02:29:01.586391', 'step': 6195, 'epoch': 3} {'type': 'loss', 'content': 0.000641929917037487, 'timestamp': '2025-09-10 02:29:01.617664', 'step': 6196, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 320], 'flops': 9492337256192}, 'timestamp': '2025-09-10 02:29:01.651653', 'step': 6196, 'epoch': 3} {'type': 'loss', 'content': 0.0020424830727279186, 'timestamp': '2025-09-10 02:29:01.661198', 'step': 6197, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 6644714900096}, 'timestamp': '2025-09-10 02:29:01.693355', 'step': 6197, 'epoch': 3} {'type': 'loss', 'content': 0.0005121281137689948, 'timestamp': '2025-09-10 02:29:01.699969', 'step': 6198, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 5695507448064}, 'timestamp': '2025-09-10 02:29:01.732326', 'step': 6198, 'epoch': 3} {'type': 'loss', 'content': 0.0009792317869141698, 'timestamp': '2025-09-10 02:29:01.736720', 'step': 6199, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 5695507448064}, 'timestamp': '2025-09-10 02:29:01.770056', 'step': 6199, 'epoch': 3} {'type': 'loss', 'content': 0.0007432979182340205, 'timestamp': '2025-09-10 02:29:01.795525', 'step': 6200, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 176], 'flops': 5220903722048}, 'timestamp': '2025-09-10 02:29:01.830760', 'step': 6200, 'epoch': 3} {'type': 'loss', 'content': 0.0006474620313383639, 'timestamp': '2025-09-10 02:29:01.835262', 'step': 6201, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 5695507448064}, 'timestamp': '2025-09-10 02:29:01.868661', 'step': 6201, 'epoch': 3} {'type': 'loss', 'content': 0.0018694715108722448, 'timestamp': '2025-09-10 02:29:01.873579', 'step': 6202, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 176], 'flops': 5220903722048}, 'timestamp': '2025-09-10 02:29:01.904865', 'step': 6202, 'epoch': 3} {'type': 'loss', 'content': 0.001604403369128704, 'timestamp': '2025-09-10 02:29:01.909029', 'step': 6203, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 384], 'flops': 11390752160256}, 'timestamp': '2025-09-10 02:29:01.954060', 'step': 6203, 'epoch': 3} {'type': 'loss', 'content': 3.979109533247538e-05, 'timestamp': '2025-09-10 02:29:01.988965', 'step': 6204, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-10 02:29:02.020146', 'step': 6204, 'epoch': 3} {'type': 'loss', 'content': 0.006631503812968731, 'timestamp': '2025-09-10 02:29:02.023430', 'step': 6205, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 6170111174080}, 'timestamp': '2025-09-10 02:29:02.057417', 'step': 6205, 'epoch': 3} {'type': 'loss', 'content': 0.003117796266451478, 'timestamp': '2025-09-10 02:29:02.064614', 'step': 6206, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 8068526078144}, 'timestamp': '2025-09-10 02:29:02.098793', 'step': 6206, 'epoch': 3} {'type': 'loss', 'content': 0.004380426835268736, 'timestamp': '2025-09-10 02:29:02.108753', 'step': 6207, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 7593922352128}, 'timestamp': '2025-09-10 02:29:02.141675', 'step': 6207, 'epoch': 3} {'type': 'loss', 'content': 0.016728242859244347, 'timestamp': '2025-09-10 02:29:02.170150', 'step': 6208, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 176], 'flops': 5220903722048}, 'timestamp': '2025-09-10 02:29:02.199959', 'step': 6208, 'epoch': 3} {'type': 'loss', 'content': 0.0002014095662161708, 'timestamp': '2025-09-10 02:29:02.202448', 'step': 6209, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 8068526078144}, 'timestamp': '2025-09-10 02:29:02.238990', 'step': 6209, 'epoch': 3} {'type': 'loss', 'content': 0.0007832984556443989, 'timestamp': '2025-09-10 02:29:02.248922', 'step': 6210, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 8543129804160}, 'timestamp': '2025-09-10 02:29:02.283798', 'step': 6210, 'epoch': 3} {'type': 'loss', 'content': 0.0007357418653555214, 'timestamp': '2025-09-10 02:29:02.294566', 'step': 6211, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 7119318626112}, 'timestamp': '2025-09-10 02:29:02.329738', 'step': 6211, 'epoch': 3} {'type': 'loss', 'content': 0.02407163567841053, 'timestamp': '2025-09-10 02:29:02.369183', 'step': 6212, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 4746299996032}, 'timestamp': '2025-09-10 02:29:02.402828', 'step': 6212, 'epoch': 3} {'type': 'loss', 'content': 8.155436808010563e-05, 'timestamp': '2025-09-10 02:29:02.407820', 'step': 6213, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 5695507448064}, 'timestamp': '2025-09-10 02:29:02.448483', 'step': 6213, 'epoch': 3} {'type': 'loss', 'content': 0.0003762414853554219, 'timestamp': '2025-09-10 02:29:02.453924', 'step': 6214, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 6170111174080}, 'timestamp': '2025-09-10 02:29:02.492975', 'step': 6214, 'epoch': 3} {'type': 'loss', 'content': 0.001163300359621644, 'timestamp': '2025-09-10 02:29:02.500308', 'step': 6215, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 8068526078144}, 'timestamp': '2025-09-10 02:29:02.540491', 'step': 6215, 'epoch': 3} {'type': 'loss', 'content': 0.011203320696949959, 'timestamp': '2025-09-10 02:29:02.571357', 'step': 6216, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 7119318626112}, 'timestamp': '2025-09-10 02:29:02.606934', 'step': 6216, 'epoch': 3} {'type': 'loss', 'content': 0.0006556047010235488, 'timestamp': '2025-09-10 02:29:02.611843', 'step': 6217, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 480], 'flops': 14238374516352}, 'timestamp': '2025-09-10 02:29:02.656645', 'step': 6217, 'epoch': 3} {'type': 'loss', 'content': 0.0007842601626180112, 'timestamp': '2025-09-10 02:29:02.674001', 'step': 6218, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 6170111174080}, 'timestamp': '2025-09-10 02:29:02.707892', 'step': 6218, 'epoch': 3} {'type': 'loss', 'content': 0.0004893920267932117, 'timestamp': '2025-09-10 02:29:02.714573', 'step': 6219, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 7119318626112}, 'timestamp': '2025-09-10 02:29:02.750107', 'step': 6219, 'epoch': 3} {'type': 'loss', 'content': 0.0008870299207046628, 'timestamp': '2025-09-10 02:29:02.778290', 'step': 6220, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 5695507448064}, 'timestamp': '2025-09-10 02:29:02.810971', 'step': 6220, 'epoch': 3} {'type': 'loss', 'content': 0.0022531996946781874, 'timestamp': '2025-09-10 02:29:02.815289', 'step': 6221, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 432], 'flops': 12814563338304}, 'timestamp': '2025-09-10 02:29:02.858836', 'step': 6221, 'epoch': 3} {'type': 'loss', 'content': 0.0007312820525839925, 'timestamp': '2025-09-10 02:29:02.874928', 'step': 6222, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 8543129804160}, 'timestamp': '2025-09-10 02:29:02.912150', 'step': 6222, 'epoch': 3} {'type': 'loss', 'content': 0.0002016266662394628, 'timestamp': '2025-09-10 02:29:02.922542', 'step': 6223, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 7593922352128}, 'timestamp': '2025-09-10 02:29:02.958404', 'step': 6223, 'epoch': 3} {'type': 'loss', 'content': 0.0006459229625761509, 'timestamp': '2025-09-10 02:29:02.986941', 'step': 6224, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 6644714900096}, 'timestamp': '2025-09-10 02:29:03.019933', 'step': 6224, 'epoch': 3} {'type': 'loss', 'content': 0.0004273521772120148, 'timestamp': '2025-09-10 02:29:03.024404', 'step': 6225, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 6170111174080}, 'timestamp': '2025-09-10 02:29:03.059004', 'step': 6225, 'epoch': 3} {'type': 'loss', 'content': 0.00010626261064317077, 'timestamp': '2025-09-10 02:29:03.065725', 'step': 6226, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 7119318626112}, 'timestamp': '2025-09-10 02:29:03.101442', 'step': 6226, 'epoch': 3} {'type': 'loss', 'content': 0.0011228015646338463, 'timestamp': '2025-09-10 02:29:03.108686', 'step': 6227, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 7119318626112}, 'timestamp': '2025-09-10 02:29:03.147917', 'step': 6227, 'epoch': 3} {'type': 'loss', 'content': 0.0005814795149490237, 'timestamp': '2025-09-10 02:29:03.176118', 'step': 6228, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 4746299996032}, 'timestamp': '2025-09-10 02:29:03.212972', 'step': 6228, 'epoch': 3} {'type': 'loss', 'content': 1.321633408224443e-05, 'timestamp': '2025-09-10 02:29:03.218168', 'step': 6229, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 304], 'flops': 9017733530176}, 'timestamp': '2025-09-10 02:29:03.262762', 'step': 6229, 'epoch': 3} {'type': 'loss', 'content': 0.002059446182101965, 'timestamp': '2025-09-10 02:29:03.274682', 'step': 6230, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 6644714900096}, 'timestamp': '2025-09-10 02:29:03.307936', 'step': 6230, 'epoch': 3} {'type': 'loss', 'content': 0.0014323127688840032, 'timestamp': '2025-09-10 02:29:03.315020', 'step': 6231, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 8068526078144}, 'timestamp': '2025-09-10 02:29:03.347935', 'step': 6231, 'epoch': 3} {'type': 'loss', 'content': 0.0002452459593769163, 'timestamp': '2025-09-10 02:29:03.378692', 'step': 6232, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 320], 'flops': 9492337256192}, 'timestamp': '2025-09-10 02:29:03.413946', 'step': 6232, 'epoch': 3} {'type': 'loss', 'content': 0.01287010032683611, 'timestamp': '2025-09-10 02:29:03.423501', 'step': 6233, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 6170111174080}, 'timestamp': '2025-09-10 02:29:03.463502', 'step': 6233, 'epoch': 3} {'type': 'loss', 'content': 3.2527696021134034e-05, 'timestamp': '2025-09-10 02:29:03.470318', 'step': 6234, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 7593922352128}, 'timestamp': '2025-09-10 02:29:03.508243', 'step': 6234, 'epoch': 3} {'type': 'loss', 'content': 0.00437973951920867, 'timestamp': '2025-09-10 02:29:03.515668', 'step': 6235, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 176], 'flops': 5220903722048}, 'timestamp': '2025-09-10 02:29:03.552354', 'step': 6235, 'epoch': 3} {'type': 'loss', 'content': 0.010270438157022, 'timestamp': '2025-09-10 02:29:03.578323', 'step': 6236, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 7119318626112}, 'timestamp': '2025-09-10 02:29:03.612892', 'step': 6236, 'epoch': 3} {'type': 'loss', 'content': 0.00011186213669134304, 'timestamp': '2025-09-10 02:29:03.619445', 'step': 6237, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 7119318626112}, 'timestamp': '2025-09-10 02:29:03.654461', 'step': 6237, 'epoch': 3} {'type': 'loss', 'content': 0.00016674351354595274, 'timestamp': '2025-09-10 02:29:03.661753', 'step': 6238, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 7593922352128}, 'timestamp': '2025-09-10 02:29:03.706916', 'step': 6238, 'epoch': 3} {'type': 'loss', 'content': 0.0005096830427646637, 'timestamp': '2025-09-10 02:29:03.714335', 'step': 6239, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-10 02:29:03.750293', 'step': 6239, 'epoch': 3} {'type': 'loss', 'content': 8.230676030507311e-05, 'timestamp': '2025-09-10 02:29:03.777459', 'step': 6240, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 416], 'flops': 12339959612288}, 'timestamp': '2025-09-10 02:29:03.816710', 'step': 6240, 'epoch': 3} {'type': 'loss', 'content': 0.00013677349488716573, 'timestamp': '2025-09-10 02:29:03.832166', 'step': 6241, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 304], 'flops': 9017733530176}, 'timestamp': '2025-09-10 02:29:03.868850', 'step': 6241, 'epoch': 3} {'type': 'loss', 'content': 2.3667438654229045e-05, 'timestamp': '2025-09-10 02:29:03.880696', 'step': 6242, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 6644714900096}, 'timestamp': '2025-09-10 02:29:03.915934', 'step': 6242, 'epoch': 3} {'type': 'loss', 'content': 0.00017127035243902355, 'timestamp': '2025-09-10 02:29:03.922666', 'step': 6243, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 6644714900096}, 'timestamp': '2025-09-10 02:29:03.957512', 'step': 6243, 'epoch': 3} {'type': 'loss', 'content': 0.0002690852852538228, 'timestamp': '2025-09-10 02:29:03.985803', 'step': 6244, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 8543129804160}, 'timestamp': '2025-09-10 02:29:04.022765', 'step': 6244, 'epoch': 3} {'type': 'loss', 'content': 0.00017910164024215192, 'timestamp': '2025-09-10 02:29:04.030531', 'step': 6245, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 7119318626112}, 'timestamp': '2025-09-10 02:29:04.066882', 'step': 6245, 'epoch': 3} {'type': 'loss', 'content': 0.0005587777122855186, 'timestamp': '2025-09-10 02:29:04.074089', 'step': 6246, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 480], 'flops': 14238374516352}, 'timestamp': '2025-09-10 02:29:04.118109', 'step': 6246, 'epoch': 3} {'type': 'loss', 'content': 0.0008401383529417217, 'timestamp': '2025-09-10 02:29:04.135472', 'step': 6247, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 8068526078144}, 'timestamp': '2025-09-10 02:29:04.170553', 'step': 6247, 'epoch': 3} {'type': 'loss', 'content': 0.0020682509057223797, 'timestamp': '2025-09-10 02:29:04.201548', 'step': 6248, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 176], 'flops': 5220903722048}, 'timestamp': '2025-09-10 02:29:04.236004', 'step': 6248, 'epoch': 3} {'type': 'loss', 'content': 0.0007200734107755125, 'timestamp': '2025-09-10 02:29:04.239906', 'step': 6249, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 6644714900096}, 'timestamp': '2025-09-10 02:29:04.272787', 'step': 6249, 'epoch': 3} {'type': 'loss', 'content': 0.00019148035789839923, 'timestamp': '2025-09-10 02:29:04.279772', 'step': 6250, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 4746299996032}, 'timestamp': '2025-09-10 02:29:04.315061', 'step': 6250, 'epoch': 3} {'type': 'loss', 'content': 4.394280040287413e-05, 'timestamp': '2025-09-10 02:29:04.317814', 'step': 6251, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 176], 'flops': 5220903722048}, 'timestamp': '2025-09-10 02:29:04.351509', 'step': 6251, 'epoch': 3} {'type': 'loss', 'content': 0.0002166307531297207, 'timestamp': '2025-09-10 02:29:04.377873', 'step': 6252, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 8068526078144}, 'timestamp': '2025-09-10 02:29:04.421383', 'step': 6252, 'epoch': 3} {'type': 'loss', 'content': 0.0003251233429182321, 'timestamp': '2025-09-10 02:29:04.428899', 'step': 6253, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 7593922352128}, 'timestamp': '2025-09-10 02:29:04.467402', 'step': 6253, 'epoch': 3} {'type': 'loss', 'content': 0.00010890467092394829, 'timestamp': '2025-09-10 02:29:04.474674', 'step': 6254, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 4746299996032}, 'timestamp': '2025-09-10 02:29:04.509098', 'step': 6254, 'epoch': 3} {'type': 'loss', 'content': 0.0005082925199531019, 'timestamp': '2025-09-10 02:29:04.511914', 'step': 6255, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 8543129804160}, 'timestamp': '2025-09-10 02:29:04.545351', 'step': 6255, 'epoch': 3} {'type': 'loss', 'content': 0.0006019308930262923, 'timestamp': '2025-09-10 02:29:04.576810', 'step': 6256, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 6170111174080}, 'timestamp': '2025-09-10 02:29:04.613745', 'step': 6256, 'epoch': 3} {'type': 'loss', 'content': 0.00034762476570904255, 'timestamp': '2025-09-10 02:29:04.617911', 'step': 6257, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 8068526078144}, 'timestamp': '2025-09-10 02:29:04.650328', 'step': 6257, 'epoch': 3} {'type': 'loss', 'content': 0.00011139985144836828, 'timestamp': '2025-09-10 02:29:04.660196', 'step': 6258, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 352], 'flops': 10441544708224}, 'timestamp': '2025-09-10 02:29:04.698657', 'step': 6258, 'epoch': 3} {'type': 'loss', 'content': 0.00037154555320739746, 'timestamp': '2025-09-10 02:29:04.712334', 'step': 6259, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 6170111174080}, 'timestamp': '2025-09-10 02:29:04.746352', 'step': 6259, 'epoch': 3} {'type': 'loss', 'content': 0.00046982159256003797, 'timestamp': '2025-09-10 02:29:04.773897', 'step': 6260, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 5695507448064}, 'timestamp': '2025-09-10 02:29:04.804817', 'step': 6260, 'epoch': 3} {'type': 'loss', 'content': 0.00027465628227218986, 'timestamp': '2025-09-10 02:29:04.807534', 'step': 6261, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 8068526078144}, 'timestamp': '2025-09-10 02:29:04.838605', 'step': 6261, 'epoch': 3} {'type': 'loss', 'content': 0.0002272390847792849, 'timestamp': '2025-09-10 02:29:04.848663', 'step': 6262, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 7593922352128}, 'timestamp': '2025-09-10 02:29:04.880669', 'step': 6262, 'epoch': 3} {'type': 'loss', 'content': 0.0003084797062911093, 'timestamp': '2025-09-10 02:29:04.888316', 'step': 6263, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 5695507448064}, 'timestamp': '2025-09-10 02:29:04.924069', 'step': 6263, 'epoch': 3} {'type': 'loss', 'content': 0.0007478706538677216, 'timestamp': '2025-09-10 02:29:04.949180', 'step': 6264, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 8543129804160}, 'timestamp': '2025-09-10 02:29:04.983852', 'step': 6264, 'epoch': 3} {'type': 'loss', 'content': 0.0004629208124242723, 'timestamp': '2025-09-10 02:29:04.992116', 'step': 6265, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 5695507448064}, 'timestamp': '2025-09-10 02:29:05.025256', 'step': 6265, 'epoch': 3} {'type': 'loss', 'content': 0.0001887015241663903, 'timestamp': '2025-09-10 02:29:05.029368', 'step': 6266, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 6644714900096}, 'timestamp': '2025-09-10 02:29:05.062885', 'step': 6266, 'epoch': 3} {'type': 'loss', 'content': 0.001650593476369977, 'timestamp': '2025-09-10 02:29:05.069594', 'step': 6267, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 6644714900096}, 'timestamp': '2025-09-10 02:29:05.102767', 'step': 6267, 'epoch': 3} {'type': 'loss', 'content': 0.0003856061666738242, 'timestamp': '2025-09-10 02:29:05.130373', 'step': 6268, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 176], 'flops': 5220903722048}, 'timestamp': '2025-09-10 02:29:05.164244', 'step': 6268, 'epoch': 3} {'type': 'loss', 'content': 0.0007830560207366943, 'timestamp': '2025-09-10 02:29:05.166809', 'step': 6269, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 320], 'flops': 9492337256192}, 'timestamp': '2025-09-10 02:29:05.203631', 'step': 6269, 'epoch': 3} {'type': 'loss', 'content': 0.0001599523238837719, 'timestamp': '2025-09-10 02:29:05.216216', 'step': 6270, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 7593922352128}, 'timestamp': '2025-09-10 02:29:05.249646', 'step': 6270, 'epoch': 3} {'type': 'loss', 'content': 0.0012749488232657313, 'timestamp': '2025-09-10 02:29:05.257268', 'step': 6271, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 7119318626112}, 'timestamp': '2025-09-10 02:29:05.292066', 'step': 6271, 'epoch': 3} {'type': 'loss', 'content': 0.0036235000006854534, 'timestamp': '2025-09-10 02:29:05.321219', 'step': 6272, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 6170111174080}, 'timestamp': '2025-09-10 02:29:05.365793', 'step': 6272, 'epoch': 3} {'type': 'loss', 'content': 0.0004903791705146432, 'timestamp': '2025-09-10 02:29:05.369874', 'step': 6273, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 6644714900096}, 'timestamp': '2025-09-10 02:29:05.402955', 'step': 6273, 'epoch': 3} {'type': 'loss', 'content': 7.18557057552971e-05, 'timestamp': '2025-09-10 02:29:05.409791', 'step': 6274, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 304], 'flops': 9017733530176}, 'timestamp': '2025-09-10 02:29:05.443844', 'step': 6274, 'epoch': 3} {'type': 'loss', 'content': 5.8820067351916805e-05, 'timestamp': '2025-09-10 02:29:05.455625', 'step': 6275, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 320], 'flops': 9492337256192}, 'timestamp': '2025-09-10 02:29:05.491242', 'step': 6275, 'epoch': 3} {'type': 'loss', 'content': 0.0001387560332659632, 'timestamp': '2025-09-10 02:29:05.524569', 'step': 6276, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 6644714900096}, 'timestamp': '2025-09-10 02:29:05.557513', 'step': 6276, 'epoch': 3} {'type': 'loss', 'content': 8.674280252307653e-05, 'timestamp': '2025-09-10 02:29:05.561570', 'step': 6277, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 5695507448064}, 'timestamp': '2025-09-10 02:29:05.597546', 'step': 6277, 'epoch': 3} {'type': 'loss', 'content': 0.0016429023817181587, 'timestamp': '2025-09-10 02:29:05.602308', 'step': 6278, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 176], 'flops': 5220903722048}, 'timestamp': '2025-09-10 02:29:05.635431', 'step': 6278, 'epoch': 3} {'type': 'loss', 'content': 0.0003711978788487613, 'timestamp': '2025-09-10 02:29:05.642513', 'step': 6279, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 6644714900096}, 'timestamp': '2025-09-10 02:29:05.678251', 'step': 6279, 'epoch': 3} {'type': 'loss', 'content': 0.0036703094374388456, 'timestamp': '2025-09-10 02:29:05.706203', 'step': 6280, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 8543129804160}, 'timestamp': '2025-09-10 02:29:05.739233', 'step': 6280, 'epoch': 3} {'type': 'loss', 'content': 0.0004256887186784297, 'timestamp': '2025-09-10 02:29:05.747299', 'step': 6281, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 7593922352128}, 'timestamp': '2025-09-10 02:29:05.780857', 'step': 6281, 'epoch': 3} {'type': 'loss', 'content': 0.00036882911808788776, 'timestamp': '2025-09-10 02:29:05.788219', 'step': 6282, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 336], 'flops': 9966940982208}, 'timestamp': '2025-09-10 02:29:05.826181', 'step': 6282, 'epoch': 3} {'type': 'loss', 'content': 0.0006809058249928057, 'timestamp': '2025-09-10 02:29:05.839544', 'step': 6283, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 4746299996032}, 'timestamp': '2025-09-10 02:29:05.874700', 'step': 6283, 'epoch': 3} {'type': 'loss', 'content': 0.0020839818753302097, 'timestamp': '2025-09-10 02:29:05.899681', 'step': 6284, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 7593922352128}, 'timestamp': '2025-09-10 02:29:05.934688', 'step': 6284, 'epoch': 3} {'type': 'loss', 'content': 0.00040493832784704864, 'timestamp': '2025-09-10 02:29:05.940112', 'step': 6285, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 352], 'flops': 10441544708224}, 'timestamp': '2025-09-10 02:29:05.978214', 'step': 6285, 'epoch': 3} {'type': 'loss', 'content': 8.699164027348161e-05, 'timestamp': '2025-09-10 02:29:05.991891', 'step': 6286, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 304], 'flops': 9017733530176}, 'timestamp': '2025-09-10 02:29:06.026523', 'step': 6286, 'epoch': 3} {'type': 'loss', 'content': 0.00018825959705282003, 'timestamp': '2025-09-10 02:29:06.038487', 'step': 6287, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 5695507448064}, 'timestamp': '2025-09-10 02:29:06.072604', 'step': 6287, 'epoch': 3} {'type': 'loss', 'content': 0.0004126779385842383, 'timestamp': '2025-09-10 02:29:06.097564', 'step': 6288, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 176], 'flops': 5220903722048}, 'timestamp': '2025-09-10 02:29:06.128927', 'step': 6288, 'epoch': 3} {'type': 'loss', 'content': 0.02210475504398346, 'timestamp': '2025-09-10 02:29:06.131446', 'step': 6289, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 6170111174080}, 'timestamp': '2025-09-10 02:29:06.164947', 'step': 6289, 'epoch': 3} {'type': 'loss', 'content': 0.0003806811582762748, 'timestamp': '2025-09-10 02:29:06.171748', 'step': 6290, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 5695507448064}, 'timestamp': '2025-09-10 02:29:06.203970', 'step': 6290, 'epoch': 3} {'type': 'loss', 'content': 0.002564129186794162, 'timestamp': '2025-09-10 02:29:06.208366', 'step': 6291, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 6644714900096}, 'timestamp': '2025-09-10 02:29:06.240294', 'step': 6291, 'epoch': 3} {'type': 'loss', 'content': 0.0007361977477557957, 'timestamp': '2025-09-10 02:29:06.268273', 'step': 6292, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-10 02:29:06.299459', 'step': 6292, 'epoch': 3} {'type': 'loss', 'content': 4.1402661736356094e-05, 'timestamp': '2025-09-10 02:29:06.301852', 'step': 6293, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 6644714900096}, 'timestamp': '2025-09-10 02:29:06.333635', 'step': 6293, 'epoch': 3} {'type': 'loss', 'content': 5.298778341966681e-05, 'timestamp': '2025-09-10 02:29:06.340760', 'step': 6294, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 7119318626112}, 'timestamp': '2025-09-10 02:29:06.373775', 'step': 6294, 'epoch': 3} {'type': 'loss', 'content': 0.04112233966588974, 'timestamp': '2025-09-10 02:29:06.381249', 'step': 6295, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 6170111174080}, 'timestamp': '2025-09-10 02:29:06.413683', 'step': 6295, 'epoch': 3} {'type': 'loss', 'content': 0.0010367208160459995, 'timestamp': '2025-09-10 02:29:06.441519', 'step': 6296, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 304], 'flops': 9017733530176}, 'timestamp': '2025-09-10 02:29:06.473067', 'step': 6296, 'epoch': 3} {'type': 'loss', 'content': 0.000295735226245597, 'timestamp': '2025-09-10 02:29:06.482727', 'step': 6297, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 6644714900096}, 'timestamp': '2025-09-10 02:29:06.515480', 'step': 6297, 'epoch': 3} {'type': 'loss', 'content': 0.00019989509019069374, 'timestamp': '2025-09-10 02:29:06.522560', 'step': 6298, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 8543129804160}, 'timestamp': '2025-09-10 02:29:06.553317', 'step': 6298, 'epoch': 3} {'type': 'loss', 'content': 8.843276737025008e-05, 'timestamp': '2025-09-10 02:29:06.564281', 'step': 6299, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 320], 'flops': 9492337256192}, 'timestamp': '2025-09-10 02:29:06.596144', 'step': 6299, 'epoch': 3} {'type': 'loss', 'content': 0.00021590027608908713, 'timestamp': '2025-09-10 02:29:06.629593', 'step': 6300, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 6644714900096}, 'timestamp': '2025-09-10 02:29:06.661426', 'step': 6300, 'epoch': 3} {'type': 'loss', 'content': 5.1329996495041996e-05, 'timestamp': '2025-09-10 02:29:06.665540', 'step': 6301, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 8068526078144}, 'timestamp': '2025-09-10 02:29:06.698119', 'step': 6301, 'epoch': 3} {'type': 'loss', 'content': 0.0038885578978806734, 'timestamp': '2025-09-10 02:29:06.707892', 'step': 6302, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 7119318626112}, 'timestamp': '2025-09-10 02:29:06.740805', 'step': 6302, 'epoch': 3} {'type': 'loss', 'content': 0.00019760747090913355, 'timestamp': '2025-09-10 02:29:06.747974', 'step': 6303, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 8543129804160}, 'timestamp': '2025-09-10 02:29:06.780375', 'step': 6303, 'epoch': 3} {'type': 'loss', 'content': 1.984067785087973e-05, 'timestamp': '2025-09-10 02:29:06.811653', 'step': 6304, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 8068526078144}, 'timestamp': '2025-09-10 02:29:06.843758', 'step': 6304, 'epoch': 3} {'type': 'loss', 'content': 0.0008169592474587262, 'timestamp': '2025-09-10 02:29:06.851168', 'step': 6305, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 7593922352128}, 'timestamp': '2025-09-10 02:29:06.882525', 'step': 6305, 'epoch': 3} {'type': 'loss', 'content': 0.0003005763574037701, 'timestamp': '2025-09-10 02:29:06.890220', 'step': 6306, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:29:06.921053', 'step': 6306, 'epoch': 3} {'type': 'loss', 'content': 9.076990681933239e-05, 'timestamp': '2025-09-10 02:29:06.923924', 'step': 6307, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 304], 'flops': 9017733530176}, 'timestamp': '2025-09-10 02:29:06.955910', 'step': 6307, 'epoch': 3} {'type': 'loss', 'content': 5.251143375062384e-05, 'timestamp': '2025-09-10 02:29:06.988551', 'step': 6308, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 6644714900096}, 'timestamp': '2025-09-10 02:29:07.020723', 'step': 6308, 'epoch': 3} {'type': 'loss', 'content': 8.822443487588316e-05, 'timestamp': '2025-09-10 02:29:07.025564', 'step': 6309, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 4746299996032}, 'timestamp': '2025-09-10 02:29:07.056508', 'step': 6309, 'epoch': 3} {'type': 'loss', 'content': 0.00012206012615934014, 'timestamp': '2025-09-10 02:29:07.059078', 'step': 6310, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 8068526078144}, 'timestamp': '2025-09-10 02:29:07.092205', 'step': 6310, 'epoch': 3} {'type': 'loss', 'content': 6.703573308186606e-05, 'timestamp': '2025-09-10 02:29:07.102240', 'step': 6311, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 8068526078144}, 'timestamp': '2025-09-10 02:29:07.134448', 'step': 6311, 'epoch': 3} {'type': 'loss', 'content': 7.877570897107944e-05, 'timestamp': '2025-09-10 02:29:07.165292', 'step': 6312, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 7119318626112}, 'timestamp': '2025-09-10 02:29:07.197350', 'step': 6312, 'epoch': 3} {'type': 'loss', 'content': 0.00010713493247749284, 'timestamp': '2025-09-10 02:29:07.202384', 'step': 6313, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 336], 'flops': 9966940982208}, 'timestamp': '2025-09-10 02:29:07.235524', 'step': 6313, 'epoch': 3} {'type': 'loss', 'content': 3.199342609150335e-05, 'timestamp': '2025-09-10 02:29:07.248852', 'step': 6314, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 176], 'flops': 5220903722048}, 'timestamp': '2025-09-10 02:29:07.280036', 'step': 6314, 'epoch': 3} {'type': 'loss', 'content': 0.02364686317741871, 'timestamp': '2025-09-10 02:29:07.283883', 'step': 6315, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 304], 'flops': 9017733530176}, 'timestamp': '2025-09-10 02:29:07.315780', 'step': 6315, 'epoch': 3} {'type': 'loss', 'content': 0.0002355035103391856, 'timestamp': '2025-09-10 02:29:07.348371', 'step': 6316, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 7593922352128}, 'timestamp': '2025-09-10 02:29:07.383001', 'step': 6316, 'epoch': 3} {'type': 'loss', 'content': 0.00014525903679896146, 'timestamp': '2025-09-10 02:29:07.388034', 'step': 6317, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 320], 'flops': 9492337256192}, 'timestamp': '2025-09-10 02:29:07.419491', 'step': 6317, 'epoch': 3} {'type': 'loss', 'content': 0.00019105462706647813, 'timestamp': '2025-09-10 02:29:07.431764', 'step': 6318, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 6170111174080}, 'timestamp': '2025-09-10 02:29:07.463177', 'step': 6318, 'epoch': 3} {'type': 'loss', 'content': 0.00010136031778529286, 'timestamp': '2025-09-10 02:29:07.469800', 'step': 6319, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 6644714900096}, 'timestamp': '2025-09-10 02:29:07.500713', 'step': 6319, 'epoch': 3} {'type': 'loss', 'content': 0.00030389634775929153, 'timestamp': '2025-09-10 02:29:07.528449', 'step': 6320, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 7119318626112}, 'timestamp': '2025-09-10 02:29:07.559425', 'step': 6320, 'epoch': 3} {'type': 'loss', 'content': 0.0005676125292666256, 'timestamp': '2025-09-10 02:29:07.564296', 'step': 6321, 'epoch': 3} {'type': 'flops', 'content': [{'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5695213340928}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3796808916480}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4429610391296}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4746011128704}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4746011128704}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6011614078336}, {'type': 'perplexity', 'in_batch_dim': [8, 736], 'batch_size': 8, 'flops': 14554433988352}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3796808916480}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4113209653888}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3796808916480}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4746011128704}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2847606704256}, {'type': 'perplexity', 'in_batch_dim': [8, 320], 'batch_size': 8, 'flops': 6328014815744}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5378812603520}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3796808916480}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4746011128704}, {'type': 'perplexity', 'in_batch_dim': [8, 352], 'batch_size': 8, 'flops': 6960816290560}, {'type': 'perplexity', 'in_batch_dim': [8, 384], 'batch_size': 8, 'flops': 7593617765376}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5378812603520}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4429610391296}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4113209653888}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5062411866112}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5062411866112}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4429610391296}, {'type': 'perplexity', 'in_batch_dim': [8, 480], 'batch_size': 8, 'flops': 9492022189824}, {'type': 'perplexity', 'in_batch_dim': [8, 448], 'batch_size': 8, 'flops': 8859220715008}, {'type': 'perplexity', 'in_batch_dim': [8, 320], 'batch_size': 8, 'flops': 6328014815744}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3796808916480}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6011614078336}, {'type': 'perplexity', 'in_batch_dim': [8, 320], 'batch_size': 8, 'flops': 6328014815744}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5695213340928}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4746011128704}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4429610391296}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5378812603520}, {'type': 'perplexity', 'in_batch_dim': [8, 544], 'batch_size': 8, 'flops': 10757625139456}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4429610391296}, {'type': 'perplexity', 'in_batch_dim': [8, 320], 'batch_size': 8, 'flops': 6328014815744}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4113209653888}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4113209653888}, {'type': 'perplexity', 'in_batch_dim': [8, 320], 'batch_size': 8, 'flops': 6328014815744}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3480408179072}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5062411866112}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6011614078336}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4429610391296}, {'type': 'perplexity', 'in_batch_dim': [8, 384], 'batch_size': 8, 'flops': 7593617765376}, {'type': 'perplexity', 'in_batch_dim': [8, 416], 'batch_size': 8, 'flops': 8226419240192}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4746011128704}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3480408179072}, {'type': 'perplexity', 'in_batch_dim': [8, 384], 'batch_size': 8, 'flops': 7593617765376}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3480408179072}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3480408179072}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5378812603520}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3796808916480}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4113209653888}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5062411866112}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5378812603520}, {'type': 'perplexity', 'in_batch_dim': [8, 320], 'batch_size': 8, 'flops': 6328014815744}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4429610391296}, {'type': 'perplexity', 'in_batch_dim': [8, 160], 'batch_size': 8, 'flops': 3164007441664}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4113209653888}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5695213340928}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3796808916480}, {'type': 'perplexity', 'in_batch_dim': [8, 160], 'batch_size': 8, 'flops': 3164007441664}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6011614078336}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5062411866112}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4429610391296}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3480408179072}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4746011128704}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3480408179072}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4113209653888}, {'type': 'perplexity', 'in_batch_dim': [8, 624], 'batch_size': 8, 'flops': 12339628826496}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4113209653888}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5695213340928}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5062411866112}, {'type': 'perplexity', 'in_batch_dim': [8, 384], 'batch_size': 8, 'flops': 7593617765376}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4429610391296}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4113209653888}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5062411866112}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4429610391296}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5695213340928}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3796808916480}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5378812603520}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4429610391296}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5378812603520}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3796808916480}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5695213340928}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4429610391296}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5062411866112}, {'type': 'perplexity', 'in_batch_dim': [8, 160], 'batch_size': 8, 'flops': 3164007441664}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4746011128704}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5695213340928}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3796808916480}, {'type': 'perplexity', 'in_batch_dim': [8, 416], 'batch_size': 8, 'flops': 8226419240192}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3796808916480}, {'type': 'perplexity', 'in_batch_dim': [8, 160], 'batch_size': 8, 'flops': 3164007441664}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4746011128704}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3796808916480}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2847606704256}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5378812603520}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4429610391296}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4113209653888}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4429610391296}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6011614078336}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4113209653888}, {'type': 'perplexity', 'in_batch_dim': [8, 320], 'batch_size': 8, 'flops': 6328014815744}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4746011128704}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4746011128704}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3796808916480}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5062411866112}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3480408179072}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3796808916480}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3480408179072}, {'type': 'perplexity', 'in_batch_dim': [8, 352], 'batch_size': 8, 'flops': 6960816290560}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5062411866112}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4429610391296}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6011614078336}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5062411866112}, {'type': 'perplexity', 'in_batch_dim': [8, 576], 'batch_size': 8, 'flops': 11390426614272}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4429610391296}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3796808916480}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6011614078336}, {'type': 'perplexity', 'in_batch_dim': [8, 432], 'batch_size': 8, 'flops': 8542819977600}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5378812603520}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5378812603520}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5062411866112}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4113209653888}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4113209653888}, {'type': 'perplexity', 'in_batch_dim': [8, 368], 'batch_size': 8, 'flops': 7277217027968}, {'type': 'perplexity', 'in_batch_dim': [8, 160], 'batch_size': 8, 'flops': 3164007441664}, {'type': 'perplexity', 'in_batch_dim': [8, 368], 'batch_size': 8, 'flops': 7277217027968}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3796808916480}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4746011128704}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4113209653888}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4429610391296}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5695213340928}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5695213340928}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5062411866112}, {'type': 'perplexity', 'in_batch_dim': [8, 432], 'batch_size': 8, 'flops': 8542819977600}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5062411866112}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4113209653888}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3480408179072}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3480408179072}, {'type': 'perplexity', 'in_batch_dim': [8, 160], 'batch_size': 8, 'flops': 3164007441664}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4113209653888}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3796808916480}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5062411866112}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 368], 'batch_size': 8, 'flops': 7277217027968}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5378812603520}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5378812603520}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5062411866112}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2847606704256}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4429610391296}, {'type': 'perplexity', 'in_batch_dim': [8, 320], 'batch_size': 8, 'flops': 6328014815744}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3796808916480}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4429610391296}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4113209653888}, {'type': 'perplexity', 'in_batch_dim': [8, 384], 'batch_size': 8, 'flops': 7593617765376}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5378812603520}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4746011128704}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3480408179072}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5062411866112}, {'type': 'perplexity', 'in_batch_dim': [8, 320], 'batch_size': 8, 'flops': 6328014815744}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5378812603520}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4113209653888}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4113209653888}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5695213340928}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3480408179072}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6011614078336}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4746011128704}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5378812603520}, {'type': 'perplexity', 'in_batch_dim': [8, 320], 'batch_size': 8, 'flops': 6328014815744}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4429610391296}, {'type': 'perplexity', 'in_batch_dim': [8, 320], 'batch_size': 8, 'flops': 6328014815744}, {'type': 'perplexity', 'in_batch_dim': [8, 416], 'batch_size': 8, 'flops': 8226419240192}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5062411866112}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5062411866112}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2847606704256}, {'type': 'perplexity', 'in_batch_dim': [8, 336], 'batch_size': 8, 'flops': 6644415553152}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6011614078336}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5062411866112}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4113209653888}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5695213340928}, {'type': 'perplexity', 'in_batch_dim': [8, 416], 'batch_size': 8, 'flops': 8226419240192}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3796808916480}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5695213340928}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4746011128704}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4429610391296}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4113209653888}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3480408179072}, {'type': 'perplexity', 'in_batch_dim': [8, 320], 'batch_size': 8, 'flops': 6328014815744}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3480408179072}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5378812603520}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6011614078336}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4429610391296}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5695213340928}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5695213340928}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3480408179072}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4429610391296}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5062411866112}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3480408179072}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4113209653888}, {'type': 'perplexity', 'in_batch_dim': [8, 352], 'batch_size': 8, 'flops': 6960816290560}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4113209653888}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2847606704256}, {'type': 'perplexity', 'in_batch_dim': [8, 160], 'batch_size': 8, 'flops': 3164007441664}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3796808916480}, {'type': 'perplexity', 'in_batch_dim': [8, 496], 'batch_size': 8, 'flops': 9808422927232}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3480408179072}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4113209653888}, {'type': 'perplexity', 'in_batch_dim': [8, 544], 'batch_size': 8, 'flops': 10757625139456}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6011614078336}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5378812603520}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5695213340928}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4746011128704}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5695213340928}, {'type': 'perplexity', 'in_batch_dim': [8, 384], 'batch_size': 8, 'flops': 7593617765376}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3480408179072}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5378812603520}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3480408179072}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5378812603520}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5062411866112}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3796808916480}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5062411866112}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5695213340928}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5378812603520}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4429610391296}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5062411866112}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3480408179072}, {'type': 'perplexity', 'in_batch_dim': [8, 656], 'batch_size': 8, 'flops': 12972430301312}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5062411866112}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3796808916480}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3480408179072}, {'type': 'perplexity', 'in_batch_dim': [8, 160], 'batch_size': 8, 'flops': 3164007441664}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5695213340928}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6011614078336}, {'type': 'perplexity', 'in_batch_dim': [8, 320], 'batch_size': 8, 'flops': 6328014815744}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5062411866112}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5062411866112}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5695213340928}, {'type': 'perplexity', 'in_batch_dim': [8, 320], 'batch_size': 8, 'flops': 6328014815744}, {'type': 'perplexity', 'in_batch_dim': [8, 384], 'batch_size': 8, 'flops': 7593617765376}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5062411866112}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4746011128704}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4113209653888}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4746011128704}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4746011128704}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3480408179072}, {'type': 'perplexity', 'in_batch_dim': [8, 320], 'batch_size': 8, 'flops': 6328014815744}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5695213340928}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5062411866112}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4113209653888}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4746011128704}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2847606704256}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3796808916480}, {'type': 'perplexity', 'in_batch_dim': [8, 336], 'batch_size': 8, 'flops': 6644415553152}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4113209653888}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4746011128704}, {'type': 'perplexity', 'in_batch_dim': [8, 496], 'batch_size': 8, 'flops': 9808422927232}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5378812603520}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5378812603520}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4429610391296}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5062411866112}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6011614078336}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4746011128704}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3796808916480}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4113209653888}, {'type': 'perplexity', 'in_batch_dim': [8, 496], 'batch_size': 8, 'flops': 9808422927232}, {'type': 'perplexity', 'in_batch_dim': [8, 448], 'batch_size': 8, 'flops': 8859220715008}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6011614078336}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3796808916480}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3480408179072}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5062411866112}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4429610391296}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4113209653888}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4746011128704}, {'type': 'perplexity', 'in_batch_dim': [8, 320], 'batch_size': 8, 'flops': 6328014815744}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6011614078336}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3480408179072}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3796808916480}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3796808916480}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4746011128704}, {'type': 'perplexity', 'in_batch_dim': [8, 416], 'batch_size': 8, 'flops': 8226419240192}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5695213340928}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6011614078336}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6011614078336}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4746011128704}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3480408179072}, {'type': 'perplexity', 'in_batch_dim': [8, 160], 'batch_size': 8, 'flops': 3164007441664}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5695213340928}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3480408179072}, {'type': 'perplexity', 'in_batch_dim': [8, 400], 'batch_size': 8, 'flops': 7910018502784}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4429610391296}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5378812603520}, {'type': 'perplexity', 'in_batch_dim': [8, 400], 'batch_size': 8, 'flops': 7910018502784}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5378812603520}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3480408179072}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6011614078336}, {'type': 'perplexity', 'in_batch_dim': [8, 160], 'batch_size': 8, 'flops': 3164007441664}, {'type': 'perplexity', 'in_batch_dim': [8, 352], 'batch_size': 8, 'flops': 6960816290560}, {'type': 'perplexity', 'in_batch_dim': [8, 320], 'batch_size': 8, 'flops': 6328014815744}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5062411866112}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5062411866112}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4113209653888}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4429610391296}, {'type': 'perplexity', 'in_batch_dim': [8, 352], 'batch_size': 8, 'flops': 6960816290560}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4113209653888}, {'type': 'perplexity', 'in_batch_dim': [8, 160], 'batch_size': 8, 'flops': 3164007441664}, {'type': 'perplexity', 'in_batch_dim': [8, 336], 'batch_size': 8, 'flops': 6644415553152}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4429610391296}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3796808916480}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5378812603520}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5378812603520}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4113209653888}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5378812603520}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3796808916480}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5378812603520}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5378812603520}, {'type': 'perplexity', 'in_batch_dim': [8, 160], 'batch_size': 8, 'flops': 3164007441664}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3480408179072}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6011614078336}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5695213340928}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5378812603520}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5378812603520}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4429610391296}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4746011128704}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6011614078336}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5062411866112}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6011614078336}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5378812603520}, {'type': 'perplexity', 'in_batch_dim': [8, 160], 'batch_size': 8, 'flops': 3164007441664}, {'type': 'perplexity', 'in_batch_dim': [8, 160], 'batch_size': 8, 'flops': 3164007441664}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5062411866112}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5062411866112}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3796808916480}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6011614078336}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4113209653888}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4746011128704}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5378812603520}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3480408179072}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3796808916480}, {'type': 'perplexity', 'in_batch_dim': [8, 320], 'batch_size': 8, 'flops': 6328014815744}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5062411866112}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4746011128704}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5062411866112}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4746011128704}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6011614078336}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3796808916480}, {'type': 'perplexity', 'in_batch_dim': [8, 160], 'batch_size': 8, 'flops': 3164007441664}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2847606704256}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4429610391296}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4746011128704}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5062411866112}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5378812603520}, {'type': 'perplexity', 'in_batch_dim': [8, 464], 'batch_size': 8, 'flops': 9175621452416}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4113209653888}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3480408179072}, {'type': 'perplexity', 'in_batch_dim': [8, 384], 'batch_size': 8, 'flops': 7593617765376}, {'type': 'perplexity', 'in_batch_dim': [8, 448], 'batch_size': 8, 'flops': 8859220715008}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5378812603520}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4113209653888}, {'type': 'perplexity', 'in_batch_dim': [8, 320], 'batch_size': 8, 'flops': 6328014815744}, {'type': 'perplexity', 'in_batch_dim': [8, 560], 'batch_size': 8, 'flops': 11074025876864}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5378812603520}, {'type': 'perplexity', 'in_batch_dim': [8, 160], 'batch_size': 8, 'flops': 3164007441664}, {'type': 'perplexity', 'in_batch_dim': [8, 320], 'batch_size': 8, 'flops': 6328014815744}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5062411866112}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6011614078336}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4746011128704}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6011614078336}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3796808916480}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4429610391296}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4113209653888}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4746011128704}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3480408179072}, {'type': 'perplexity', 'in_batch_dim': [8, 384], 'batch_size': 8, 'flops': 7593617765376}, {'type': 'perplexity', 'in_batch_dim': [8, 576], 'batch_size': 8, 'flops': 11390426614272}, {'type': 'perplexity', 'in_batch_dim': [8, 160], 'batch_size': 8, 'flops': 3164007441664}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4746011128704}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4746011128704}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4429610391296}, {'type': 'perplexity', 'in_batch_dim': [8, 336], 'batch_size': 8, 'flops': 6644415553152}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4746011128704}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3796808916480}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3796808916480}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5062411866112}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5378812603520}, {'type': 'perplexity', 'in_batch_dim': [8, 368], 'batch_size': 8, 'flops': 7277217027968}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3480408179072}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4113209653888}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4746011128704}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3480408179072}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4746011128704}, {'type': 'perplexity', 'in_batch_dim': [8, 1168], 'batch_size': 8, 'flops': 23097253898368}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5062411866112}, {'type': 'perplexity', 'in_batch_dim': [8, 160], 'batch_size': 8, 'flops': 3164007441664}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4746011128704}, {'type': 'perplexity', 'in_batch_dim': [8, 640], 'batch_size': 8, 'flops': 12656029563904}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5378812603520}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4429610391296}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4429610391296}, {'type': 'perplexity', 'in_batch_dim': [8, 384], 'batch_size': 8, 'flops': 7593617765376}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3480408179072}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4113209653888}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3796808916480}, {'type': 'perplexity', 'in_batch_dim': [8, 336], 'batch_size': 8, 'flops': 6644415553152}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3480408179072}, {'type': 'perplexity', 'in_batch_dim': [6, 208], 'batch_size': 8, 'flops': 4113209653888}], 'timestamp': '2025-09-10 02:29:18.130853', 'step': 6321, 'epoch': 3} {'type': 'pplx', 'content': 22779605.39083871, 'timestamp': '2025-09-10 02:29:18.134652', 'step': 6321, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 6644714900096}, 'timestamp': '2025-09-10 02:29:18.167347', 'step': 6321, 'epoch': 3} {'type': 'loss', 'content': 0.000332222378347069, 'timestamp': '2025-09-10 02:29:18.173098', 'step': 6322, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 5695507448064}, 'timestamp': '2025-09-10 02:29:18.205647', 'step': 6322, 'epoch': 3} {'type': 'loss', 'content': 0.003474770812317729, 'timestamp': '2025-09-10 02:29:18.209511', 'step': 6323, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 6644714900096}, 'timestamp': '2025-09-10 02:29:18.240090', 'step': 6323, 'epoch': 3} {'type': 'loss', 'content': 0.0007376385619863868, 'timestamp': '2025-09-10 02:29:18.267870', 'step': 6324, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 7119318626112}, 'timestamp': '2025-09-10 02:29:18.306032', 'step': 6324, 'epoch': 3} {'type': 'loss', 'content': 0.021130096167325974, 'timestamp': '2025-09-10 02:29:18.310959', 'step': 6325, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 7119318626112}, 'timestamp': '2025-09-10 02:29:18.343599', 'step': 6325, 'epoch': 3} {'type': 'loss', 'content': 0.0008156650001183152, 'timestamp': '2025-09-10 02:29:18.351079', 'step': 6326, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 352], 'flops': 10441544708224}, 'timestamp': '2025-09-10 02:29:18.391999', 'step': 6326, 'epoch': 3} {'type': 'loss', 'content': 4.109603105462156e-05, 'timestamp': '2025-09-10 02:29:18.405697', 'step': 6327, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 416], 'flops': 12339959612288}, 'timestamp': '2025-09-10 02:29:18.445889', 'step': 6327, 'epoch': 3} {'type': 'loss', 'content': 0.0003445267793722451, 'timestamp': '2025-09-10 02:29:18.482728', 'step': 6328, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 6644714900096}, 'timestamp': '2025-09-10 02:29:18.517430', 'step': 6328, 'epoch': 3} {'type': 'loss', 'content': 0.00021596229635179043, 'timestamp': '2025-09-10 02:29:18.522150', 'step': 6329, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 8068526078144}, 'timestamp': '2025-09-10 02:29:18.554164', 'step': 6329, 'epoch': 3} {'type': 'loss', 'content': 0.0005771416472271085, 'timestamp': '2025-09-10 02:29:18.564176', 'step': 6330, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 6170111174080}, 'timestamp': '2025-09-10 02:29:18.602787', 'step': 6330, 'epoch': 3} {'type': 'loss', 'content': 0.0002720944758038968, 'timestamp': '2025-09-10 02:29:18.609250', 'step': 6331, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 5695507448064}, 'timestamp': '2025-09-10 02:29:18.640525', 'step': 6331, 'epoch': 3} {'type': 'loss', 'content': 0.0016264248406514525, 'timestamp': '2025-09-10 02:29:18.669868', 'step': 6332, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 304], 'flops': 9017733530176}, 'timestamp': '2025-09-10 02:29:18.701530', 'step': 6332, 'epoch': 3} {'type': 'loss', 'content': 0.00010872381972149014, 'timestamp': '2025-09-10 02:29:18.711391', 'step': 6333, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 6644714900096}, 'timestamp': '2025-09-10 02:29:18.743000', 'step': 6333, 'epoch': 3} {'type': 'loss', 'content': 0.00038386922096833587, 'timestamp': '2025-09-10 02:29:18.749953', 'step': 6334, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 7593922352128}, 'timestamp': '2025-09-10 02:29:18.780151', 'step': 6334, 'epoch': 3} {'type': 'loss', 'content': 0.0001077132037607953, 'timestamp': '2025-09-10 02:29:18.787758', 'step': 6335, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 320], 'flops': 9492337256192}, 'timestamp': '2025-09-10 02:29:18.819772', 'step': 6335, 'epoch': 3} {'type': 'loss', 'content': 0.0004933988093398511, 'timestamp': '2025-09-10 02:29:18.853286', 'step': 6336, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 7593922352128}, 'timestamp': '2025-09-10 02:29:18.885241', 'step': 6336, 'epoch': 3} {'type': 'loss', 'content': 0.0017707353690639138, 'timestamp': '2025-09-10 02:29:18.890646', 'step': 6337, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 7593922352128}, 'timestamp': '2025-09-10 02:29:18.921242', 'step': 6337, 'epoch': 3} {'type': 'loss', 'content': 0.00026189981144852936, 'timestamp': '2025-09-10 02:29:18.929151', 'step': 6338, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 7119318626112}, 'timestamp': '2025-09-10 02:29:18.960376', 'step': 6338, 'epoch': 3} {'type': 'loss', 'content': 0.07036115974187851, 'timestamp': '2025-09-10 02:29:18.967898', 'step': 6339, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 304], 'flops': 9017733530176}, 'timestamp': '2025-09-10 02:29:19.000684', 'step': 6339, 'epoch': 3} {'type': 'loss', 'content': 0.00025524134980514646, 'timestamp': '2025-09-10 02:29:19.033580', 'step': 6340, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 6644714900096}, 'timestamp': '2025-09-10 02:29:19.063853', 'step': 6340, 'epoch': 3} {'type': 'loss', 'content': 0.00033761485246941447, 'timestamp': '2025-09-10 02:29:19.068626', 'step': 6341, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 5695507448064}, 'timestamp': '2025-09-10 02:29:19.099684', 'step': 6341, 'epoch': 3} {'type': 'loss', 'content': 0.0010502010118216276, 'timestamp': '2025-09-10 02:29:19.104150', 'step': 6342, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 6644714900096}, 'timestamp': '2025-09-10 02:29:19.136087', 'step': 6342, 'epoch': 3} {'type': 'loss', 'content': 0.0003272096801083535, 'timestamp': '2025-09-10 02:29:19.143049', 'step': 6343, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 8068526078144}, 'timestamp': '2025-09-10 02:29:19.174394', 'step': 6343, 'epoch': 3} {'type': 'loss', 'content': 1.662676368141547e-05, 'timestamp': '2025-09-10 02:29:19.205407', 'step': 6344, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 176], 'flops': 5220903722048}, 'timestamp': '2025-09-10 02:29:19.235942', 'step': 6344, 'epoch': 3} {'type': 'loss', 'content': 0.00015760198584757745, 'timestamp': '2025-09-10 02:29:19.238088', 'step': 6345, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 8068526078144}, 'timestamp': '2025-09-10 02:29:19.269413', 'step': 6345, 'epoch': 3} {'type': 'loss', 'content': 0.0003509795351419598, 'timestamp': '2025-09-10 02:29:19.279455', 'step': 6346, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 6170111174080}, 'timestamp': '2025-09-10 02:29:19.309940', 'step': 6346, 'epoch': 3} {'type': 'loss', 'content': 0.002398541197180748, 'timestamp': '2025-09-10 02:29:19.316540', 'step': 6347, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 8068526078144}, 'timestamp': '2025-09-10 02:29:19.347416', 'step': 6347, 'epoch': 3} {'type': 'loss', 'content': 6.516561552416533e-05, 'timestamp': '2025-09-10 02:29:19.378249', 'step': 6348, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 176], 'flops': 5220903722048}, 'timestamp': '2025-09-10 02:29:19.408493', 'step': 6348, 'epoch': 3} {'type': 'loss', 'content': 7.48638849472627e-05, 'timestamp': '2025-09-10 02:29:19.410610', 'step': 6349, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 5695507448064}, 'timestamp': '2025-09-10 02:29:19.441360', 'step': 6349, 'epoch': 3} {'type': 'loss', 'content': 3.105393989244476e-05, 'timestamp': '2025-09-10 02:29:19.445757', 'step': 6350, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 7593922352128}, 'timestamp': '2025-09-10 02:29:19.477340', 'step': 6350, 'epoch': 3} {'type': 'loss', 'content': 0.00025512793217785656, 'timestamp': '2025-09-10 02:29:19.484997', 'step': 6351, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 176], 'flops': 5220903722048}, 'timestamp': '2025-09-10 02:29:19.515787', 'step': 6351, 'epoch': 3} {'type': 'loss', 'content': 0.0005597950075753033, 'timestamp': '2025-09-10 02:29:19.540639', 'step': 6352, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 5695507448064}, 'timestamp': '2025-09-10 02:29:19.571736', 'step': 6352, 'epoch': 3} {'type': 'loss', 'content': 0.0011579144047573209, 'timestamp': '2025-09-10 02:29:19.575426', 'step': 6353, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 7593922352128}, 'timestamp': '2025-09-10 02:29:19.607848', 'step': 6353, 'epoch': 3} {'type': 'loss', 'content': 8.454350609099492e-05, 'timestamp': '2025-09-10 02:29:19.615373', 'step': 6354, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 8068526078144}, 'timestamp': '2025-09-10 02:29:19.647231', 'step': 6354, 'epoch': 3} {'type': 'loss', 'content': 5.9956550103379413e-05, 'timestamp': '2025-09-10 02:29:19.656938', 'step': 6355, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 6644714900096}, 'timestamp': '2025-09-10 02:29:19.687539', 'step': 6355, 'epoch': 3} {'type': 'loss', 'content': 3.7725461879745126e-05, 'timestamp': '2025-09-10 02:29:19.715332', 'step': 6356, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 304], 'flops': 9017733530176}, 'timestamp': '2025-09-10 02:29:19.748381', 'step': 6356, 'epoch': 3} {'type': 'loss', 'content': 0.00045506874448619783, 'timestamp': '2025-09-10 02:29:19.757975', 'step': 6357, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 6170111174080}, 'timestamp': '2025-09-10 02:29:19.789077', 'step': 6357, 'epoch': 3} {'type': 'loss', 'content': 0.005231906659901142, 'timestamp': '2025-09-10 02:29:19.795938', 'step': 6358, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 6170111174080}, 'timestamp': '2025-09-10 02:29:19.827714', 'step': 6358, 'epoch': 3} {'type': 'loss', 'content': 0.0005539475823752582, 'timestamp': '2025-09-10 02:29:19.834445', 'step': 6359, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 6644714900096}, 'timestamp': '2025-09-10 02:29:19.865423', 'step': 6359, 'epoch': 3} {'type': 'loss', 'content': 0.00410681264474988, 'timestamp': '2025-09-10 02:29:19.893284', 'step': 6360, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 7119318626112}, 'timestamp': '2025-09-10 02:29:19.923869', 'step': 6360, 'epoch': 3} {'type': 'loss', 'content': 0.0008158148848451674, 'timestamp': '2025-09-10 02:29:19.929029', 'step': 6361, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 352], 'flops': 10441544708224}, 'timestamp': '2025-09-10 02:29:19.963810', 'step': 6361, 'epoch': 3} {'type': 'loss', 'content': 0.0011450514430180192, 'timestamp': '2025-09-10 02:29:19.977531', 'step': 6362, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 176], 'flops': 5220903722048}, 'timestamp': '2025-09-10 02:29:20.009029', 'step': 6362, 'epoch': 3} {'type': 'loss', 'content': 0.0005734324222430587, 'timestamp': '2025-09-10 02:29:20.012932', 'step': 6363, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 7593922352128}, 'timestamp': '2025-09-10 02:29:20.044053', 'step': 6363, 'epoch': 3} {'type': 'loss', 'content': 8.529059414286166e-05, 'timestamp': '2025-09-10 02:29:20.072584', 'step': 6364, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 7119318626112}, 'timestamp': '2025-09-10 02:29:20.105593', 'step': 6364, 'epoch': 3} {'type': 'loss', 'content': 0.022070255130529404, 'timestamp': '2025-09-10 02:29:20.110631', 'step': 6365, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 7119318626112}, 'timestamp': '2025-09-10 02:29:20.150185', 'step': 6365, 'epoch': 3} {'type': 'loss', 'content': 0.0003581468772608787, 'timestamp': '2025-09-10 02:29:20.157700', 'step': 6366, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 400], 'flops': 11865355886272}, 'timestamp': '2025-09-10 02:29:20.202522', 'step': 6366, 'epoch': 3} {'type': 'loss', 'content': 0.015283132903277874, 'timestamp': '2025-09-10 02:29:20.218121', 'step': 6367, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 7119318626112}, 'timestamp': '2025-09-10 02:29:20.249342', 'step': 6367, 'epoch': 3} {'type': 'loss', 'content': 0.00029461071244440973, 'timestamp': '2025-09-10 02:29:20.277746', 'step': 6368, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 7119318626112}, 'timestamp': '2025-09-10 02:29:20.310632', 'step': 6368, 'epoch': 3} {'type': 'loss', 'content': 0.010076651349663734, 'timestamp': '2025-09-10 02:29:20.315613', 'step': 6369, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 6644714900096}, 'timestamp': '2025-09-10 02:29:20.346636', 'step': 6369, 'epoch': 3} {'type': 'loss', 'content': 0.0035657952539622784, 'timestamp': '2025-09-10 02:29:20.353572', 'step': 6370, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 400], 'flops': 11865355886272}, 'timestamp': '2025-09-10 02:29:20.391678', 'step': 6370, 'epoch': 3} {'type': 'loss', 'content': 0.010319511406123638, 'timestamp': '2025-09-10 02:29:20.407379', 'step': 6371, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 464], 'flops': 13763770790336}, 'timestamp': '2025-09-10 02:29:20.448636', 'step': 6371, 'epoch': 3} {'type': 'loss', 'content': 0.00019968757987953722, 'timestamp': '2025-09-10 02:29:20.486664', 'step': 6372, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 6644714900096}, 'timestamp': '2025-09-10 02:29:20.517082', 'step': 6372, 'epoch': 3} {'type': 'loss', 'content': 0.0001550498272990808, 'timestamp': '2025-09-10 02:29:20.521556', 'step': 6373, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 6170111174080}, 'timestamp': '2025-09-10 02:29:20.552236', 'step': 6373, 'epoch': 3} {'type': 'loss', 'content': 0.0003731503675226122, 'timestamp': '2025-09-10 02:29:20.559068', 'step': 6374, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 320], 'flops': 9492337256192}, 'timestamp': '2025-09-10 02:29:20.589976', 'step': 6374, 'epoch': 3} {'type': 'loss', 'content': 0.00011220359738217667, 'timestamp': '2025-09-10 02:29:20.602529', 'step': 6375, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 320], 'flops': 9492337256192}, 'timestamp': '2025-09-10 02:29:20.636903', 'step': 6375, 'epoch': 3} {'type': 'loss', 'content': 0.0002799866779241711, 'timestamp': '2025-09-10 02:29:20.670381', 'step': 6376, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 496], 'flops': 14712978242368}, 'timestamp': '2025-09-10 02:29:20.709953', 'step': 6376, 'epoch': 3} {'type': 'loss', 'content': 0.003151228418573737, 'timestamp': '2025-09-10 02:29:20.727181', 'step': 6377, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 320], 'flops': 9492337256192}, 'timestamp': '2025-09-10 02:29:20.758329', 'step': 6377, 'epoch': 3} {'type': 'loss', 'content': 0.00033930037170648575, 'timestamp': '2025-09-10 02:29:20.770815', 'step': 6378, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 6170111174080}, 'timestamp': '2025-09-10 02:29:20.801897', 'step': 6378, 'epoch': 3} {'type': 'loss', 'content': 6.999308243393898e-05, 'timestamp': '2025-09-10 02:29:20.808629', 'step': 6379, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 5695507448064}, 'timestamp': '2025-09-10 02:29:20.839608', 'step': 6379, 'epoch': 3} {'type': 'loss', 'content': 0.00043307337909936905, 'timestamp': '2025-09-10 02:29:20.864987', 'step': 6380, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 4746299996032}, 'timestamp': '2025-09-10 02:29:20.895725', 'step': 6380, 'epoch': 3} {'type': 'loss', 'content': 0.00034244899870827794, 'timestamp': '2025-09-10 02:29:20.898013', 'step': 6381, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 304], 'flops': 9017733530176}, 'timestamp': '2025-09-10 02:29:20.933233', 'step': 6381, 'epoch': 3} {'type': 'loss', 'content': 0.0011802476365119219, 'timestamp': '2025-09-10 02:29:20.945269', 'step': 6382, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 8543129804160}, 'timestamp': '2025-09-10 02:29:20.982320', 'step': 6382, 'epoch': 3} {'type': 'loss', 'content': 0.0002502583956811577, 'timestamp': '2025-09-10 02:29:20.993226', 'step': 6383, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 6644714900096}, 'timestamp': '2025-09-10 02:29:21.030608', 'step': 6383, 'epoch': 3} {'type': 'loss', 'content': 0.0013230193872004747, 'timestamp': '2025-09-10 02:29:21.058653', 'step': 6384, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 176], 'flops': 5220903722048}, 'timestamp': '2025-09-10 02:29:21.093631', 'step': 6384, 'epoch': 3} {'type': 'loss', 'content': 0.0048297131434082985, 'timestamp': '2025-09-10 02:29:21.096468', 'step': 6385, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 6170111174080}, 'timestamp': '2025-09-10 02:29:21.129065', 'step': 6385, 'epoch': 3} {'type': 'loss', 'content': 1.7518630556878634e-05, 'timestamp': '2025-09-10 02:29:21.134528', 'step': 6386, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 7119318626112}, 'timestamp': '2025-09-10 02:29:21.166246', 'step': 6386, 'epoch': 3} {'type': 'loss', 'content': 0.0001769508671713993, 'timestamp': '2025-09-10 02:29:21.173212', 'step': 6387, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 8068526078144}, 'timestamp': '2025-09-10 02:29:21.205453', 'step': 6387, 'epoch': 3} {'type': 'loss', 'content': 7.868801185395569e-05, 'timestamp': '2025-09-10 02:29:21.236430', 'step': 6388, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 6644714900096}, 'timestamp': '2025-09-10 02:29:21.272399', 'step': 6388, 'epoch': 3} {'type': 'loss', 'content': 0.0004954601754434407, 'timestamp': '2025-09-10 02:29:21.281350', 'step': 6389, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 304], 'flops': 9017733530176}, 'timestamp': '2025-09-10 02:29:21.315513', 'step': 6389, 'epoch': 3} {'type': 'loss', 'content': 0.0008721998310647905, 'timestamp': '2025-09-10 02:29:21.327525', 'step': 6390, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 6644714900096}, 'timestamp': '2025-09-10 02:29:21.361599', 'step': 6390, 'epoch': 3} {'type': 'loss', 'content': 9.93388457573019e-05, 'timestamp': '2025-09-10 02:29:21.368612', 'step': 6391, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 4746299996032}, 'timestamp': '2025-09-10 02:29:21.400136', 'step': 6391, 'epoch': 3} {'type': 'loss', 'content': 6.254738400457427e-05, 'timestamp': '2025-09-10 02:29:21.424203', 'step': 6392, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 8543129804160}, 'timestamp': '2025-09-10 02:29:21.458021', 'step': 6392, 'epoch': 3} {'type': 'loss', 'content': 0.0006808959878981113, 'timestamp': '2025-09-10 02:29:21.466527', 'step': 6393, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 432], 'flops': 12814563338304}, 'timestamp': '2025-09-10 02:29:21.508602', 'step': 6393, 'epoch': 3} {'type': 'loss', 'content': 0.00033295477624051273, 'timestamp': '2025-09-10 02:29:21.524809', 'step': 6394, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 7119318626112}, 'timestamp': '2025-09-10 02:29:21.557536', 'step': 6394, 'epoch': 3} {'type': 'loss', 'content': 0.0014583735028281808, 'timestamp': '2025-09-10 02:29:21.565081', 'step': 6395, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 384], 'flops': 11390752160256}, 'timestamp': '2025-09-10 02:29:21.603694', 'step': 6395, 'epoch': 3} {'type': 'loss', 'content': 0.000546331750229001, 'timestamp': '2025-09-10 02:29:21.638539', 'step': 6396, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 5695507448064}, 'timestamp': '2025-09-10 02:29:21.673034', 'step': 6396, 'epoch': 3} {'type': 'loss', 'content': 0.00026958558009937406, 'timestamp': '2025-09-10 02:29:21.676535', 'step': 6397, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 368], 'flops': 10916148434240}, 'timestamp': '2025-09-10 02:29:21.712740', 'step': 6397, 'epoch': 3} {'type': 'loss', 'content': 0.00034523935755714774, 'timestamp': '2025-09-10 02:29:21.726607', 'step': 6398, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 6170111174080}, 'timestamp': '2025-09-10 02:29:21.761790', 'step': 6398, 'epoch': 3} {'type': 'loss', 'content': 0.00016174910706467927, 'timestamp': '2025-09-10 02:29:21.771121', 'step': 6399, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 7119318626112}, 'timestamp': '2025-09-10 02:29:21.805907', 'step': 6399, 'epoch': 3} {'type': 'loss', 'content': 0.0008864761330187321, 'timestamp': '2025-09-10 02:29:21.833966', 'step': 6400, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 6644714900096}, 'timestamp': '2025-09-10 02:29:21.865231', 'step': 6400, 'epoch': 3} {'type': 'loss', 'content': 0.0001508051936980337, 'timestamp': '2025-09-10 02:29:21.869796', 'step': 6401, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 336], 'flops': 9966940982208}, 'timestamp': '2025-09-10 02:29:21.906240', 'step': 6401, 'epoch': 3} {'type': 'loss', 'content': 0.0057227760553359985, 'timestamp': '2025-09-10 02:29:21.919569', 'step': 6402, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 304], 'flops': 9017733530176}, 'timestamp': '2025-09-10 02:29:21.951281', 'step': 6402, 'epoch': 3} {'type': 'loss', 'content': 0.02214057371020317, 'timestamp': '2025-09-10 02:29:21.963275', 'step': 6403, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 8068526078144}, 'timestamp': '2025-09-10 02:29:22.004341', 'step': 6403, 'epoch': 3} {'type': 'loss', 'content': 0.0005699954344891012, 'timestamp': '2025-09-10 02:29:22.035455', 'step': 6404, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 5695507448064}, 'timestamp': '2025-09-10 02:29:22.066677', 'step': 6404, 'epoch': 3} {'type': 'loss', 'content': 0.000413557660067454, 'timestamp': '2025-09-10 02:29:22.069007', 'step': 6405, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 7119318626112}, 'timestamp': '2025-09-10 02:29:22.099892', 'step': 6405, 'epoch': 3} {'type': 'loss', 'content': 0.00027196184964850545, 'timestamp': '2025-09-10 02:29:22.107338', 'step': 6406, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 6644714900096}, 'timestamp': '2025-09-10 02:29:22.144101', 'step': 6406, 'epoch': 3} {'type': 'loss', 'content': 0.00011958154937019572, 'timestamp': '2025-09-10 02:29:22.151018', 'step': 6407, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 4746299996032}, 'timestamp': '2025-09-10 02:29:22.182768', 'step': 6407, 'epoch': 3} {'type': 'loss', 'content': 0.0002388076245551929, 'timestamp': '2025-09-10 02:29:22.206859', 'step': 6408, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 8543129804160}, 'timestamp': '2025-09-10 02:29:22.238367', 'step': 6408, 'epoch': 3} {'type': 'loss', 'content': 0.0002621088642627001, 'timestamp': '2025-09-10 02:29:22.246894', 'step': 6409, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 8068526078144}, 'timestamp': '2025-09-10 02:29:22.278944', 'step': 6409, 'epoch': 3} {'type': 'loss', 'content': 0.0003606698883231729, 'timestamp': '2025-09-10 02:29:22.289087', 'step': 6410, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 304], 'flops': 9017733530176}, 'timestamp': '2025-09-10 02:29:22.323509', 'step': 6410, 'epoch': 3} {'type': 'loss', 'content': 0.006846979726105928, 'timestamp': '2025-09-10 02:29:22.335043', 'step': 6411, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 7593922352128}, 'timestamp': '2025-09-10 02:29:22.369704', 'step': 6411, 'epoch': 3} {'type': 'loss', 'content': 0.0006495703128166497, 'timestamp': '2025-09-10 02:29:22.398294', 'step': 6412, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 352], 'flops': 10441544708224}, 'timestamp': '2025-09-10 02:29:22.431201', 'step': 6412, 'epoch': 3} {'type': 'loss', 'content': 0.001449008472263813, 'timestamp': '2025-09-10 02:29:22.444220', 'step': 6413, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 416], 'flops': 12339959612288}, 'timestamp': '2025-09-10 02:29:22.482765', 'step': 6413, 'epoch': 3} {'type': 'loss', 'content': 0.04917486757040024, 'timestamp': '2025-09-10 02:29:22.498676', 'step': 6414, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 7119318626112}, 'timestamp': '2025-09-10 02:29:22.529859', 'step': 6414, 'epoch': 3} {'type': 'loss', 'content': 0.00042798795038834214, 'timestamp': '2025-09-10 02:29:22.537243', 'step': 6415, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 176], 'flops': 5220903722048}, 'timestamp': '2025-09-10 02:29:22.568292', 'step': 6415, 'epoch': 3} {'type': 'loss', 'content': 0.00018693515448831022, 'timestamp': '2025-09-10 02:29:22.593097', 'step': 6416, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 336], 'flops': 9966940982208}, 'timestamp': '2025-09-10 02:29:22.625685', 'step': 6416, 'epoch': 3} {'type': 'loss', 'content': 0.00015946182247716933, 'timestamp': '2025-09-10 02:29:22.638362', 'step': 6417, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 8068526078144}, 'timestamp': '2025-09-10 02:29:22.669691', 'step': 6417, 'epoch': 3} {'type': 'loss', 'content': 3.3737265766831115e-05, 'timestamp': '2025-09-10 02:29:22.679917', 'step': 6418, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 8068526078144}, 'timestamp': '2025-09-10 02:29:22.712098', 'step': 6418, 'epoch': 3} {'type': 'loss', 'content': 8.317730680573732e-05, 'timestamp': '2025-09-10 02:29:22.722150', 'step': 6419, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 7119318626112}, 'timestamp': '2025-09-10 02:29:22.753156', 'step': 6419, 'epoch': 3} {'type': 'loss', 'content': 0.010926821269094944, 'timestamp': '2025-09-10 02:29:22.781553', 'step': 6420, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 6170111174080}, 'timestamp': '2025-09-10 02:29:22.813347', 'step': 6420, 'epoch': 3} {'type': 'loss', 'content': 0.00013701205898541957, 'timestamp': '2025-09-10 02:29:22.817704', 'step': 6421, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 320], 'flops': 9492337256192}, 'timestamp': '2025-09-10 02:29:22.850116', 'step': 6421, 'epoch': 3} {'type': 'loss', 'content': 0.0004380210011731833, 'timestamp': '2025-09-10 02:29:22.862568', 'step': 6422, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 7119318626112}, 'timestamp': '2025-09-10 02:29:22.893942', 'step': 6422, 'epoch': 3} {'type': 'loss', 'content': 0.00029581101262010634, 'timestamp': '2025-09-10 02:29:22.901232', 'step': 6423, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 8068526078144}, 'timestamp': '2025-09-10 02:29:22.932650', 'step': 6423, 'epoch': 3} {'type': 'loss', 'content': 0.00028576585464179516, 'timestamp': '2025-09-10 02:29:22.963633', 'step': 6424, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 7119318626112}, 'timestamp': '2025-09-10 02:29:22.994670', 'step': 6424, 'epoch': 3} {'type': 'loss', 'content': 0.0003905796620529145, 'timestamp': '2025-09-10 02:29:22.999676', 'step': 6425, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 6644714900096}, 'timestamp': '2025-09-10 02:29:23.030989', 'step': 6425, 'epoch': 3} {'type': 'loss', 'content': 0.0002705628867261112, 'timestamp': '2025-09-10 02:29:23.038009', 'step': 6426, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 352], 'flops': 10441544708224}, 'timestamp': '2025-09-10 02:29:23.072734', 'step': 6426, 'epoch': 3} {'type': 'loss', 'content': 0.02130054123699665, 'timestamp': '2025-09-10 02:29:23.086423', 'step': 6427, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 6644714900096}, 'timestamp': '2025-09-10 02:29:23.118615', 'step': 6427, 'epoch': 3} {'type': 'loss', 'content': 0.0003769658214878291, 'timestamp': '2025-09-10 02:29:23.146259', 'step': 6428, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 4746299996032}, 'timestamp': '2025-09-10 02:29:23.180368', 'step': 6428, 'epoch': 3} {'type': 'loss', 'content': 0.00011719001486198977, 'timestamp': '2025-09-10 02:29:23.182825', 'step': 6429, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 7593922352128}, 'timestamp': '2025-09-10 02:29:23.214238', 'step': 6429, 'epoch': 3} {'type': 'loss', 'content': 0.0029267126228660345, 'timestamp': '2025-09-10 02:29:23.221867', 'step': 6430, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 4746299996032}, 'timestamp': '2025-09-10 02:29:23.252492', 'step': 6430, 'epoch': 3} {'type': 'loss', 'content': 0.00013307588233146816, 'timestamp': '2025-09-10 02:29:23.255084', 'step': 6431, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 528], 'flops': 15662185694400}, 'timestamp': '2025-09-10 02:29:23.300832', 'step': 6431, 'epoch': 3} {'type': 'loss', 'content': 0.0002167918864870444, 'timestamp': '2025-09-10 02:29:23.340946', 'step': 6432, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 6644714900096}, 'timestamp': '2025-09-10 02:29:23.380369', 'step': 6432, 'epoch': 3} {'type': 'loss', 'content': 0.00011096680100308731, 'timestamp': '2025-09-10 02:29:23.384766', 'step': 6433, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 320], 'flops': 9492337256192}, 'timestamp': '2025-09-10 02:29:23.424786', 'step': 6433, 'epoch': 3} {'type': 'loss', 'content': 6.910169031471014e-05, 'timestamp': '2025-09-10 02:29:23.437350', 'step': 6434, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:29:23.470228', 'step': 6434, 'epoch': 3} {'type': 'loss', 'content': 0.004474697168916464, 'timestamp': '2025-09-10 02:29:23.475217', 'step': 6435, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 8543129804160}, 'timestamp': '2025-09-10 02:29:23.510567', 'step': 6435, 'epoch': 3} {'type': 'loss', 'content': 0.0006678365753032267, 'timestamp': '2025-09-10 02:29:23.542157', 'step': 6436, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 5695507448064}, 'timestamp': '2025-09-10 02:29:23.583432', 'step': 6436, 'epoch': 3} {'type': 'loss', 'content': 0.005918839015066624, 'timestamp': '2025-09-10 02:29:23.586403', 'step': 6437, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 384], 'flops': 11390752160256}, 'timestamp': '2025-09-10 02:29:23.626467', 'step': 6437, 'epoch': 3} {'type': 'loss', 'content': 0.0001482577354181558, 'timestamp': '2025-09-10 02:29:23.640472', 'step': 6438, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 8068526078144}, 'timestamp': '2025-09-10 02:29:23.677715', 'step': 6438, 'epoch': 3} {'type': 'loss', 'content': 0.00014806709077674896, 'timestamp': '2025-09-10 02:29:23.687915', 'step': 6439, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 7593922352128}, 'timestamp': '2025-09-10 02:29:23.725397', 'step': 6439, 'epoch': 3} {'type': 'loss', 'content': 0.0005189132643863559, 'timestamp': '2025-09-10 02:29:23.754067', 'step': 6440, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 352], 'flops': 10441544708224}, 'timestamp': '2025-09-10 02:29:23.787062', 'step': 6440, 'epoch': 3} {'type': 'loss', 'content': 0.00017340357590001076, 'timestamp': '2025-09-10 02:29:23.800043', 'step': 6441, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 7593922352128}, 'timestamp': '2025-09-10 02:29:23.831000', 'step': 6441, 'epoch': 3} {'type': 'loss', 'content': 0.030487949028611183, 'timestamp': '2025-09-10 02:29:23.838852', 'step': 6442, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 6170111174080}, 'timestamp': '2025-09-10 02:29:23.869448', 'step': 6442, 'epoch': 3} {'type': 'loss', 'content': 0.026207389310002327, 'timestamp': '2025-09-10 02:29:23.876351', 'step': 6443, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 4746299996032}, 'timestamp': '2025-09-10 02:29:23.907430', 'step': 6443, 'epoch': 3} {'type': 'loss', 'content': 0.00012488577340263873, 'timestamp': '2025-09-10 02:29:23.931293', 'step': 6444, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 4746299996032}, 'timestamp': '2025-09-10 02:29:23.961966', 'step': 6444, 'epoch': 3} {'type': 'loss', 'content': 0.00043719136738218367, 'timestamp': '2025-09-10 02:29:23.964116', 'step': 6445, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 6170111174080}, 'timestamp': '2025-09-10 02:29:24.004712', 'step': 6445, 'epoch': 3} {'type': 'loss', 'content': 0.0001817305019358173, 'timestamp': '2025-09-10 02:29:24.011635', 'step': 6446, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 416], 'flops': 12339959612288}, 'timestamp': '2025-09-10 02:29:24.052007', 'step': 6446, 'epoch': 3} {'type': 'loss', 'content': 0.00037571805296465755, 'timestamp': '2025-09-10 02:29:24.067930', 'step': 6447, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 5695507448064}, 'timestamp': '2025-09-10 02:29:24.099761', 'step': 6447, 'epoch': 3} {'type': 'loss', 'content': 0.00024322188983205706, 'timestamp': '2025-09-10 02:29:24.125144', 'step': 6448, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 7593922352128}, 'timestamp': '2025-09-10 02:29:24.157642', 'step': 6448, 'epoch': 3} {'type': 'loss', 'content': 0.0034805149771273136, 'timestamp': '2025-09-10 02:29:24.163103', 'step': 6449, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 7119318626112}, 'timestamp': '2025-09-10 02:29:24.193931', 'step': 6449, 'epoch': 3} {'type': 'loss', 'content': 8.158596756402403e-05, 'timestamp': '2025-09-10 02:29:24.201350', 'step': 6450, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 176], 'flops': 5220903722048}, 'timestamp': '2025-09-10 02:29:24.231856', 'step': 6450, 'epoch': 3} {'type': 'loss', 'content': 0.0003348083992023021, 'timestamp': '2025-09-10 02:29:24.235749', 'step': 6451, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 6170111174080}, 'timestamp': '2025-09-10 02:29:24.266402', 'step': 6451, 'epoch': 3} {'type': 'loss', 'content': 0.001363294431939721, 'timestamp': '2025-09-10 02:29:24.294172', 'step': 6452, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 336], 'flops': 9966940982208}, 'timestamp': '2025-09-10 02:29:24.325874', 'step': 6452, 'epoch': 3} {'type': 'loss', 'content': 0.0007565673440694809, 'timestamp': '2025-09-10 02:29:24.338554', 'step': 6453, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 7593922352128}, 'timestamp': '2025-09-10 02:29:24.370268', 'step': 6453, 'epoch': 3} {'type': 'loss', 'content': 3.2014875614549965e-05, 'timestamp': '2025-09-10 02:29:24.378144', 'step': 6454, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 7593922352128}, 'timestamp': '2025-09-10 02:29:24.408859', 'step': 6454, 'epoch': 3} {'type': 'loss', 'content': 8.36865438031964e-05, 'timestamp': '2025-09-10 02:29:24.416537', 'step': 6455, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 5695507448064}, 'timestamp': '2025-09-10 02:29:24.447271', 'step': 6455, 'epoch': 3} {'type': 'loss', 'content': 0.00034000244340859354, 'timestamp': '2025-09-10 02:29:24.472591', 'step': 6456, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 608], 'flops': 18035204324480}, 'timestamp': '2025-09-10 02:29:24.522668', 'step': 6456, 'epoch': 3} {'type': 'loss', 'content': 0.0011615986004471779, 'timestamp': '2025-09-10 02:29:24.544241', 'step': 6457, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 4746299996032}, 'timestamp': '2025-09-10 02:29:24.576145', 'step': 6457, 'epoch': 3} {'type': 'loss', 'content': 0.0007336985436268151, 'timestamp': '2025-09-10 02:29:24.578766', 'step': 6458, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 304], 'flops': 9017733530176}, 'timestamp': '2025-09-10 02:29:24.610449', 'step': 6458, 'epoch': 3} {'type': 'loss', 'content': 2.4305074475705624e-05, 'timestamp': '2025-09-10 02:29:24.622480', 'step': 6459, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 176], 'flops': 5220903722048}, 'timestamp': '2025-09-10 02:29:24.655440', 'step': 6459, 'epoch': 3} {'type': 'loss', 'content': 0.0005083387950435281, 'timestamp': '2025-09-10 02:29:24.680290', 'step': 6460, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 5695507448064}, 'timestamp': '2025-09-10 02:29:24.711201', 'step': 6460, 'epoch': 3} {'type': 'loss', 'content': 0.0002868285810109228, 'timestamp': '2025-09-10 02:29:24.713814', 'step': 6461, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 4746299996032}, 'timestamp': '2025-09-10 02:29:24.744724', 'step': 6461, 'epoch': 3} {'type': 'loss', 'content': 6.002017835271545e-05, 'timestamp': '2025-09-10 02:29:24.748333', 'step': 6462, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 4746299996032}, 'timestamp': '2025-09-10 02:29:24.781362', 'step': 6462, 'epoch': 3} {'type': 'loss', 'content': 9.268764551961794e-05, 'timestamp': '2025-09-10 02:29:24.783970', 'step': 6463, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 4746299996032}, 'timestamp': '2025-09-10 02:29:24.814060', 'step': 6463, 'epoch': 3} {'type': 'loss', 'content': 0.0001622526760911569, 'timestamp': '2025-09-10 02:29:24.837871', 'step': 6464, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 448], 'flops': 13289167064320}, 'timestamp': '2025-09-10 02:29:24.877032', 'step': 6464, 'epoch': 3} {'type': 'loss', 'content': 0.0037606866098940372, 'timestamp': '2025-09-10 02:29:24.892897', 'step': 6465, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 352], 'flops': 10441544708224}, 'timestamp': '2025-09-10 02:29:24.927792', 'step': 6465, 'epoch': 3} {'type': 'loss', 'content': 0.0015440176939591765, 'timestamp': '2025-09-10 02:29:24.941588', 'step': 6466, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 8068526078144}, 'timestamp': '2025-09-10 02:29:24.973250', 'step': 6466, 'epoch': 3} {'type': 'loss', 'content': 0.00043152968282811344, 'timestamp': '2025-09-10 02:29:24.983279', 'step': 6467, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 8068526078144}, 'timestamp': '2025-09-10 02:29:25.014538', 'step': 6467, 'epoch': 3} {'type': 'loss', 'content': 0.0006583757349289954, 'timestamp': '2025-09-10 02:29:25.045133', 'step': 6468, 'epoch': 3} {'type': 'flops', 'content': [{'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5695213340928}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3796808916480}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4429610391296}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4746011128704}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4746011128704}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6011614078336}, {'type': 'perplexity', 'in_batch_dim': [8, 736], 'batch_size': 8, 'flops': 14554433988352}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3796808916480}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4113209653888}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3796808916480}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4746011128704}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2847606704256}, {'type': 'perplexity', 'in_batch_dim': [8, 320], 'batch_size': 8, 'flops': 6328014815744}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5378812603520}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3796808916480}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4746011128704}, {'type': 'perplexity', 'in_batch_dim': [8, 352], 'batch_size': 8, 'flops': 6960816290560}, {'type': 'perplexity', 'in_batch_dim': [8, 384], 'batch_size': 8, 'flops': 7593617765376}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5378812603520}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4429610391296}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4113209653888}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5062411866112}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5062411866112}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4429610391296}, {'type': 'perplexity', 'in_batch_dim': [8, 480], 'batch_size': 8, 'flops': 9492022189824}, {'type': 'perplexity', 'in_batch_dim': [8, 448], 'batch_size': 8, 'flops': 8859220715008}, {'type': 'perplexity', 'in_batch_dim': [8, 320], 'batch_size': 8, 'flops': 6328014815744}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3796808916480}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6011614078336}, {'type': 'perplexity', 'in_batch_dim': [8, 320], 'batch_size': 8, 'flops': 6328014815744}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5695213340928}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4746011128704}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4429610391296}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5378812603520}, {'type': 'perplexity', 'in_batch_dim': [8, 544], 'batch_size': 8, 'flops': 10757625139456}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4429610391296}, {'type': 'perplexity', 'in_batch_dim': [8, 320], 'batch_size': 8, 'flops': 6328014815744}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4113209653888}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4113209653888}, {'type': 'perplexity', 'in_batch_dim': [8, 320], 'batch_size': 8, 'flops': 6328014815744}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3480408179072}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5062411866112}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6011614078336}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4429610391296}, {'type': 'perplexity', 'in_batch_dim': [8, 384], 'batch_size': 8, 'flops': 7593617765376}, {'type': 'perplexity', 'in_batch_dim': [8, 416], 'batch_size': 8, 'flops': 8226419240192}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4746011128704}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3480408179072}, {'type': 'perplexity', 'in_batch_dim': [8, 384], 'batch_size': 8, 'flops': 7593617765376}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3480408179072}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3480408179072}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5378812603520}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3796808916480}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4113209653888}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5062411866112}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5378812603520}, {'type': 'perplexity', 'in_batch_dim': [8, 320], 'batch_size': 8, 'flops': 6328014815744}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4429610391296}, {'type': 'perplexity', 'in_batch_dim': [8, 160], 'batch_size': 8, 'flops': 3164007441664}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4113209653888}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5695213340928}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3796808916480}, {'type': 'perplexity', 'in_batch_dim': [8, 160], 'batch_size': 8, 'flops': 3164007441664}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6011614078336}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5062411866112}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4429610391296}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3480408179072}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4746011128704}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3480408179072}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4113209653888}, {'type': 'perplexity', 'in_batch_dim': [8, 624], 'batch_size': 8, 'flops': 12339628826496}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4113209653888}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5695213340928}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5062411866112}, {'type': 'perplexity', 'in_batch_dim': [8, 384], 'batch_size': 8, 'flops': 7593617765376}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4429610391296}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4113209653888}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5062411866112}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4429610391296}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5695213340928}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3796808916480}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5378812603520}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4429610391296}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5378812603520}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3796808916480}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5695213340928}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4429610391296}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5062411866112}, {'type': 'perplexity', 'in_batch_dim': [8, 160], 'batch_size': 8, 'flops': 3164007441664}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4746011128704}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5695213340928}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3796808916480}, {'type': 'perplexity', 'in_batch_dim': [8, 416], 'batch_size': 8, 'flops': 8226419240192}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3796808916480}, {'type': 'perplexity', 'in_batch_dim': [8, 160], 'batch_size': 8, 'flops': 3164007441664}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4746011128704}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3796808916480}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2847606704256}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5378812603520}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4429610391296}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4113209653888}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4429610391296}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6011614078336}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4113209653888}, {'type': 'perplexity', 'in_batch_dim': [8, 320], 'batch_size': 8, 'flops': 6328014815744}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4746011128704}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4746011128704}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3796808916480}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5062411866112}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3480408179072}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3796808916480}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3480408179072}, {'type': 'perplexity', 'in_batch_dim': [8, 352], 'batch_size': 8, 'flops': 6960816290560}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5062411866112}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4429610391296}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6011614078336}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5062411866112}, {'type': 'perplexity', 'in_batch_dim': [8, 576], 'batch_size': 8, 'flops': 11390426614272}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4429610391296}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3796808916480}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6011614078336}, {'type': 'perplexity', 'in_batch_dim': [8, 432], 'batch_size': 8, 'flops': 8542819977600}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5378812603520}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5378812603520}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5062411866112}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4113209653888}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4113209653888}, {'type': 'perplexity', 'in_batch_dim': [8, 368], 'batch_size': 8, 'flops': 7277217027968}, {'type': 'perplexity', 'in_batch_dim': [8, 160], 'batch_size': 8, 'flops': 3164007441664}, {'type': 'perplexity', 'in_batch_dim': [8, 368], 'batch_size': 8, 'flops': 7277217027968}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3796808916480}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4746011128704}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4113209653888}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4429610391296}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5695213340928}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5695213340928}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5062411866112}, {'type': 'perplexity', 'in_batch_dim': [8, 432], 'batch_size': 8, 'flops': 8542819977600}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5062411866112}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4113209653888}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3480408179072}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3480408179072}, {'type': 'perplexity', 'in_batch_dim': [8, 160], 'batch_size': 8, 'flops': 3164007441664}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4113209653888}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3796808916480}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5062411866112}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 368], 'batch_size': 8, 'flops': 7277217027968}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5378812603520}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5378812603520}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5062411866112}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2847606704256}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4429610391296}, {'type': 'perplexity', 'in_batch_dim': [8, 320], 'batch_size': 8, 'flops': 6328014815744}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3796808916480}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4429610391296}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4113209653888}, {'type': 'perplexity', 'in_batch_dim': [8, 384], 'batch_size': 8, 'flops': 7593617765376}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5378812603520}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4746011128704}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3480408179072}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5062411866112}, {'type': 'perplexity', 'in_batch_dim': [8, 320], 'batch_size': 8, 'flops': 6328014815744}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5378812603520}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4113209653888}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4113209653888}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5695213340928}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3480408179072}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6011614078336}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4746011128704}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5378812603520}, {'type': 'perplexity', 'in_batch_dim': [8, 320], 'batch_size': 8, 'flops': 6328014815744}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4429610391296}, {'type': 'perplexity', 'in_batch_dim': [8, 320], 'batch_size': 8, 'flops': 6328014815744}, {'type': 'perplexity', 'in_batch_dim': [8, 416], 'batch_size': 8, 'flops': 8226419240192}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5062411866112}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5062411866112}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2847606704256}, {'type': 'perplexity', 'in_batch_dim': [8, 336], 'batch_size': 8, 'flops': 6644415553152}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6011614078336}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5062411866112}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4113209653888}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5695213340928}, {'type': 'perplexity', 'in_batch_dim': [8, 416], 'batch_size': 8, 'flops': 8226419240192}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3796808916480}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5695213340928}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4746011128704}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4429610391296}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4113209653888}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3480408179072}, {'type': 'perplexity', 'in_batch_dim': [8, 320], 'batch_size': 8, 'flops': 6328014815744}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3480408179072}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5378812603520}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6011614078336}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4429610391296}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5695213340928}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5695213340928}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3480408179072}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4429610391296}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5062411866112}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3480408179072}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4113209653888}, {'type': 'perplexity', 'in_batch_dim': [8, 352], 'batch_size': 8, 'flops': 6960816290560}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4113209653888}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2847606704256}, {'type': 'perplexity', 'in_batch_dim': [8, 160], 'batch_size': 8, 'flops': 3164007441664}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3796808916480}, {'type': 'perplexity', 'in_batch_dim': [8, 496], 'batch_size': 8, 'flops': 9808422927232}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3480408179072}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4113209653888}, {'type': 'perplexity', 'in_batch_dim': [8, 544], 'batch_size': 8, 'flops': 10757625139456}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6011614078336}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5378812603520}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5695213340928}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4746011128704}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5695213340928}, {'type': 'perplexity', 'in_batch_dim': [8, 384], 'batch_size': 8, 'flops': 7593617765376}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3480408179072}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5378812603520}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3480408179072}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5378812603520}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5062411866112}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3796808916480}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5062411866112}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5695213340928}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5378812603520}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4429610391296}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5062411866112}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3480408179072}, {'type': 'perplexity', 'in_batch_dim': [8, 656], 'batch_size': 8, 'flops': 12972430301312}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5062411866112}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3796808916480}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3480408179072}, {'type': 'perplexity', 'in_batch_dim': [8, 160], 'batch_size': 8, 'flops': 3164007441664}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5695213340928}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6011614078336}, {'type': 'perplexity', 'in_batch_dim': [8, 320], 'batch_size': 8, 'flops': 6328014815744}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5062411866112}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5062411866112}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5695213340928}, {'type': 'perplexity', 'in_batch_dim': [8, 320], 'batch_size': 8, 'flops': 6328014815744}, {'type': 'perplexity', 'in_batch_dim': [8, 384], 'batch_size': 8, 'flops': 7593617765376}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5062411866112}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4746011128704}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4113209653888}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4746011128704}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4746011128704}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3480408179072}, {'type': 'perplexity', 'in_batch_dim': [8, 320], 'batch_size': 8, 'flops': 6328014815744}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5695213340928}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5062411866112}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4113209653888}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4746011128704}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2847606704256}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3796808916480}, {'type': 'perplexity', 'in_batch_dim': [8, 336], 'batch_size': 8, 'flops': 6644415553152}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4113209653888}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4746011128704}, {'type': 'perplexity', 'in_batch_dim': [8, 496], 'batch_size': 8, 'flops': 9808422927232}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5378812603520}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5378812603520}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4429610391296}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5062411866112}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6011614078336}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4746011128704}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3796808916480}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4113209653888}, {'type': 'perplexity', 'in_batch_dim': [8, 496], 'batch_size': 8, 'flops': 9808422927232}, {'type': 'perplexity', 'in_batch_dim': [8, 448], 'batch_size': 8, 'flops': 8859220715008}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6011614078336}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3796808916480}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3480408179072}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5062411866112}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4429610391296}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4113209653888}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4746011128704}, {'type': 'perplexity', 'in_batch_dim': [8, 320], 'batch_size': 8, 'flops': 6328014815744}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6011614078336}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3480408179072}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3796808916480}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3796808916480}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4746011128704}, {'type': 'perplexity', 'in_batch_dim': [8, 416], 'batch_size': 8, 'flops': 8226419240192}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5695213340928}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6011614078336}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6011614078336}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4746011128704}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3480408179072}, {'type': 'perplexity', 'in_batch_dim': [8, 160], 'batch_size': 8, 'flops': 3164007441664}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5695213340928}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3480408179072}, {'type': 'perplexity', 'in_batch_dim': [8, 400], 'batch_size': 8, 'flops': 7910018502784}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4429610391296}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5378812603520}, {'type': 'perplexity', 'in_batch_dim': [8, 400], 'batch_size': 8, 'flops': 7910018502784}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5378812603520}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3480408179072}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6011614078336}, {'type': 'perplexity', 'in_batch_dim': [8, 160], 'batch_size': 8, 'flops': 3164007441664}, {'type': 'perplexity', 'in_batch_dim': [8, 352], 'batch_size': 8, 'flops': 6960816290560}, {'type': 'perplexity', 'in_batch_dim': [8, 320], 'batch_size': 8, 'flops': 6328014815744}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5062411866112}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5062411866112}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4113209653888}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4429610391296}, {'type': 'perplexity', 'in_batch_dim': [8, 352], 'batch_size': 8, 'flops': 6960816290560}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4113209653888}, {'type': 'perplexity', 'in_batch_dim': [8, 160], 'batch_size': 8, 'flops': 3164007441664}, {'type': 'perplexity', 'in_batch_dim': [8, 336], 'batch_size': 8, 'flops': 6644415553152}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4429610391296}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3796808916480}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5378812603520}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5378812603520}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4113209653888}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5378812603520}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3796808916480}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5378812603520}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5378812603520}, {'type': 'perplexity', 'in_batch_dim': [8, 160], 'batch_size': 8, 'flops': 3164007441664}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3480408179072}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6011614078336}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5695213340928}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5378812603520}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5378812603520}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4429610391296}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4746011128704}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6011614078336}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5062411866112}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6011614078336}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5378812603520}, {'type': 'perplexity', 'in_batch_dim': [8, 160], 'batch_size': 8, 'flops': 3164007441664}, {'type': 'perplexity', 'in_batch_dim': [8, 160], 'batch_size': 8, 'flops': 3164007441664}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5062411866112}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5062411866112}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3796808916480}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6011614078336}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4113209653888}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4746011128704}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5378812603520}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3480408179072}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3796808916480}, {'type': 'perplexity', 'in_batch_dim': [8, 320], 'batch_size': 8, 'flops': 6328014815744}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5062411866112}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4746011128704}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5062411866112}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4746011128704}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6011614078336}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3796808916480}, {'type': 'perplexity', 'in_batch_dim': [8, 160], 'batch_size': 8, 'flops': 3164007441664}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2847606704256}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4429610391296}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4746011128704}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5062411866112}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5378812603520}, {'type': 'perplexity', 'in_batch_dim': [8, 464], 'batch_size': 8, 'flops': 9175621452416}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4113209653888}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3480408179072}, {'type': 'perplexity', 'in_batch_dim': [8, 384], 'batch_size': 8, 'flops': 7593617765376}, {'type': 'perplexity', 'in_batch_dim': [8, 448], 'batch_size': 8, 'flops': 8859220715008}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5378812603520}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4113209653888}, {'type': 'perplexity', 'in_batch_dim': [8, 320], 'batch_size': 8, 'flops': 6328014815744}, {'type': 'perplexity', 'in_batch_dim': [8, 560], 'batch_size': 8, 'flops': 11074025876864}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5378812603520}, {'type': 'perplexity', 'in_batch_dim': [8, 160], 'batch_size': 8, 'flops': 3164007441664}, {'type': 'perplexity', 'in_batch_dim': [8, 320], 'batch_size': 8, 'flops': 6328014815744}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5062411866112}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6011614078336}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4746011128704}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6011614078336}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3796808916480}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4429610391296}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4113209653888}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4746011128704}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3480408179072}, {'type': 'perplexity', 'in_batch_dim': [8, 384], 'batch_size': 8, 'flops': 7593617765376}, {'type': 'perplexity', 'in_batch_dim': [8, 576], 'batch_size': 8, 'flops': 11390426614272}, {'type': 'perplexity', 'in_batch_dim': [8, 160], 'batch_size': 8, 'flops': 3164007441664}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4746011128704}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4746011128704}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4429610391296}, {'type': 'perplexity', 'in_batch_dim': [8, 336], 'batch_size': 8, 'flops': 6644415553152}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4746011128704}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3796808916480}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3796808916480}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5062411866112}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5378812603520}, {'type': 'perplexity', 'in_batch_dim': [8, 368], 'batch_size': 8, 'flops': 7277217027968}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3480408179072}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4113209653888}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4746011128704}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3480408179072}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4746011128704}, {'type': 'perplexity', 'in_batch_dim': [8, 1168], 'batch_size': 8, 'flops': 23097253898368}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5062411866112}, {'type': 'perplexity', 'in_batch_dim': [8, 160], 'batch_size': 8, 'flops': 3164007441664}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4746011128704}, {'type': 'perplexity', 'in_batch_dim': [8, 640], 'batch_size': 8, 'flops': 12656029563904}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5378812603520}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4429610391296}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4429610391296}, {'type': 'perplexity', 'in_batch_dim': [8, 384], 'batch_size': 8, 'flops': 7593617765376}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3480408179072}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4113209653888}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3796808916480}, {'type': 'perplexity', 'in_batch_dim': [8, 336], 'batch_size': 8, 'flops': 6644415553152}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3480408179072}, {'type': 'perplexity', 'in_batch_dim': [6, 208], 'batch_size': 8, 'flops': 4113209653888}], 'timestamp': '2025-09-10 02:29:35.349732', 'step': 6468, 'epoch': 3} {'type': 'pplx', 'content': 22255779.46558232, 'timestamp': '2025-09-10 02:29:35.352759', 'step': 6468, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 7593922352128}, 'timestamp': '2025-09-10 02:29:35.384222', 'step': 6468, 'epoch': 3} {'type': 'loss', 'content': 0.0009330078610219061, 'timestamp': '2025-09-10 02:29:35.388293', 'step': 6469, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 7119318626112}, 'timestamp': '2025-09-10 02:29:35.419665', 'step': 6469, 'epoch': 3} {'type': 'loss', 'content': 0.010204659774899483, 'timestamp': '2025-09-10 02:29:35.426735', 'step': 6470, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 6644714900096}, 'timestamp': '2025-09-10 02:29:35.459078', 'step': 6470, 'epoch': 3} {'type': 'loss', 'content': 0.00023531516490038484, 'timestamp': '2025-09-10 02:29:35.465701', 'step': 6471, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 176], 'flops': 5220903722048}, 'timestamp': '2025-09-10 02:29:35.503012', 'step': 6471, 'epoch': 3} {'type': 'loss', 'content': 0.0004967449931427836, 'timestamp': '2025-09-10 02:29:35.528638', 'step': 6472, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 5695507448064}, 'timestamp': '2025-09-10 02:29:35.568261', 'step': 6472, 'epoch': 3} {'type': 'loss', 'content': 0.0005900642718188465, 'timestamp': '2025-09-10 02:29:35.572624', 'step': 6473, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 8543129804160}, 'timestamp': '2025-09-10 02:29:35.612783', 'step': 6473, 'epoch': 3} {'type': 'loss', 'content': 0.0001301489828620106, 'timestamp': '2025-09-10 02:29:35.623386', 'step': 6474, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 5695507448064}, 'timestamp': '2025-09-10 02:29:35.658385', 'step': 6474, 'epoch': 3} {'type': 'loss', 'content': 0.0002881655527744442, 'timestamp': '2025-09-10 02:29:35.663428', 'step': 6475, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 304], 'flops': 9017733530176}, 'timestamp': '2025-09-10 02:29:35.701857', 'step': 6475, 'epoch': 3} {'type': 'loss', 'content': 0.00041221058927476406, 'timestamp': '2025-09-10 02:29:35.734473', 'step': 6476, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 7119318626112}, 'timestamp': '2025-09-10 02:29:35.780821', 'step': 6476, 'epoch': 3} {'type': 'loss', 'content': 0.0002252735139336437, 'timestamp': '2025-09-10 02:29:35.785876', 'step': 6477, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 8068526078144}, 'timestamp': '2025-09-10 02:29:35.820594', 'step': 6477, 'epoch': 3} {'type': 'loss', 'content': 0.00011001846723956987, 'timestamp': '2025-09-10 02:29:35.830222', 'step': 6478, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 5695507448064}, 'timestamp': '2025-09-10 02:29:35.862077', 'step': 6478, 'epoch': 3} {'type': 'loss', 'content': 0.0007892417488619685, 'timestamp': '2025-09-10 02:29:35.865799', 'step': 6479, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 6644714900096}, 'timestamp': '2025-09-10 02:29:35.898683', 'step': 6479, 'epoch': 3} {'type': 'loss', 'content': 0.009268686175346375, 'timestamp': '2025-09-10 02:29:35.926101', 'step': 6480, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 8068526078144}, 'timestamp': '2025-09-10 02:29:35.961547', 'step': 6480, 'epoch': 3} {'type': 'loss', 'content': 0.002452400978654623, 'timestamp': '2025-09-10 02:29:35.969221', 'step': 6481, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 5695507448064}, 'timestamp': '2025-09-10 02:29:36.000580', 'step': 6481, 'epoch': 3} {'type': 'loss', 'content': 0.00030889807385392487, 'timestamp': '2025-09-10 02:29:36.005020', 'step': 6482, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 7119318626112}, 'timestamp': '2025-09-10 02:29:36.045471', 'step': 6482, 'epoch': 3} {'type': 'loss', 'content': 0.00015111747779883444, 'timestamp': '2025-09-10 02:29:36.052698', 'step': 6483, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 6170111174080}, 'timestamp': '2025-09-10 02:29:36.091401', 'step': 6483, 'epoch': 3} {'type': 'loss', 'content': 0.000214752959436737, 'timestamp': '2025-09-10 02:29:36.118935', 'step': 6484, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 7119318626112}, 'timestamp': '2025-09-10 02:29:36.151773', 'step': 6484, 'epoch': 3} {'type': 'loss', 'content': 0.0003622962685767561, 'timestamp': '2025-09-10 02:29:36.156467', 'step': 6485, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 7119318626112}, 'timestamp': '2025-09-10 02:29:36.189541', 'step': 6485, 'epoch': 3} {'type': 'loss', 'content': 0.00042898583342321217, 'timestamp': '2025-09-10 02:29:36.196999', 'step': 6486, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 304], 'flops': 9017733530176}, 'timestamp': '2025-09-10 02:29:36.232058', 'step': 6486, 'epoch': 3} {'type': 'loss', 'content': 0.0003162114298902452, 'timestamp': '2025-09-10 02:29:36.244003', 'step': 6487, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 6170111174080}, 'timestamp': '2025-09-10 02:29:36.277950', 'step': 6487, 'epoch': 3} {'type': 'loss', 'content': 0.0003182740474585444, 'timestamp': '2025-09-10 02:29:36.305280', 'step': 6488, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 7593922352128}, 'timestamp': '2025-09-10 02:29:36.339842', 'step': 6488, 'epoch': 3} {'type': 'loss', 'content': 0.00012332449841778725, 'timestamp': '2025-09-10 02:29:36.345057', 'step': 6489, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 6170111174080}, 'timestamp': '2025-09-10 02:29:36.384063', 'step': 6489, 'epoch': 3} {'type': 'loss', 'content': 0.00016331372899003327, 'timestamp': '2025-09-10 02:29:36.390704', 'step': 6490, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 336], 'flops': 9966940982208}, 'timestamp': '2025-09-10 02:29:36.427948', 'step': 6490, 'epoch': 3} {'type': 'loss', 'content': 0.0025508024264127016, 'timestamp': '2025-09-10 02:29:36.441375', 'step': 6491, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 8543129804160}, 'timestamp': '2025-09-10 02:29:36.479851', 'step': 6491, 'epoch': 3} {'type': 'loss', 'content': 0.0007555651245638728, 'timestamp': '2025-09-10 02:29:36.511641', 'step': 6492, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 4746299996032}, 'timestamp': '2025-09-10 02:29:36.546508', 'step': 6492, 'epoch': 3} {'type': 'loss', 'content': 0.0002513106446713209, 'timestamp': '2025-09-10 02:29:36.550495', 'step': 6493, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 8543129804160}, 'timestamp': '2025-09-10 02:29:36.590373', 'step': 6493, 'epoch': 3} {'type': 'loss', 'content': 0.00017626323096919805, 'timestamp': '2025-09-10 02:29:36.601330', 'step': 6494, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 384], 'flops': 11390752160256}, 'timestamp': '2025-09-10 02:29:36.643268', 'step': 6494, 'epoch': 3} {'type': 'loss', 'content': 0.0001794335839804262, 'timestamp': '2025-09-10 02:29:36.657290', 'step': 6495, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 8543129804160}, 'timestamp': '2025-09-10 02:29:36.689244', 'step': 6495, 'epoch': 3} {'type': 'loss', 'content': 7.488606206607074e-05, 'timestamp': '2025-09-10 02:29:36.720858', 'step': 6496, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 5695507448064}, 'timestamp': '2025-09-10 02:29:36.751619', 'step': 6496, 'epoch': 3} {'type': 'loss', 'content': 0.0005462738336063921, 'timestamp': '2025-09-10 02:29:36.753944', 'step': 6497, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 400], 'flops': 11865355886272}, 'timestamp': '2025-09-10 02:29:36.792379', 'step': 6497, 'epoch': 3} {'type': 'loss', 'content': 0.00012696681369561702, 'timestamp': '2025-09-10 02:29:36.807955', 'step': 6498, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 8543129804160}, 'timestamp': '2025-09-10 02:29:36.841494', 'step': 6498, 'epoch': 3} {'type': 'loss', 'content': 0.0005157067789696157, 'timestamp': '2025-09-10 02:29:36.852298', 'step': 6499, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 4746299996032}, 'timestamp': '2025-09-10 02:29:36.883819', 'step': 6499, 'epoch': 3} {'type': 'loss', 'content': 0.0005326425889506936, 'timestamp': '2025-09-10 02:29:36.907923', 'step': 6500, 'epoch': 3} {'type': 'info', 'content': 'Checkpoint saved at step 6500', 'timestamp': '2025-09-10 02:29:41.632309', 'step': 6500, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 6170111174080}, 'timestamp': '2025-09-10 02:29:41.664361', 'step': 6500, 'epoch': 3} {'type': 'loss', 'content': 0.00022441011969931424, 'timestamp': '2025-09-10 02:29:41.668364', 'step': 6501, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:29:41.699351', 'step': 6501, 'epoch': 3} {'type': 'loss', 'content': 0.0002523001458030194, 'timestamp': '2025-09-10 02:29:41.701189', 'step': 6502, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 6170111174080}, 'timestamp': '2025-09-10 02:29:41.732711', 'step': 6502, 'epoch': 3} {'type': 'loss', 'content': 3.853170346701518e-05, 'timestamp': '2025-09-10 02:29:41.739497', 'step': 6503, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 176], 'flops': 5220903722048}, 'timestamp': '2025-09-10 02:29:41.769971', 'step': 6503, 'epoch': 3} {'type': 'loss', 'content': 0.0001199183170683682, 'timestamp': '2025-09-10 02:29:41.794729', 'step': 6504, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 400], 'flops': 11865355886272}, 'timestamp': '2025-09-10 02:29:41.831803', 'step': 6504, 'epoch': 3} {'type': 'loss', 'content': 0.0005333385779522359, 'timestamp': '2025-09-10 02:29:41.846885', 'step': 6505, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 8068526078144}, 'timestamp': '2025-09-10 02:29:41.877696', 'step': 6505, 'epoch': 3} {'type': 'loss', 'content': 0.0003356721135787666, 'timestamp': '2025-09-10 02:29:41.887991', 'step': 6506, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 384], 'flops': 11390752160256}, 'timestamp': '2025-09-10 02:29:41.930725', 'step': 6506, 'epoch': 3} {'type': 'loss', 'content': 0.0004762968164868653, 'timestamp': '2025-09-10 02:29:41.944737', 'step': 6507, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 6644714900096}, 'timestamp': '2025-09-10 02:29:41.977144', 'step': 6507, 'epoch': 3} {'type': 'loss', 'content': 0.0037521845661103725, 'timestamp': '2025-09-10 02:29:42.005092', 'step': 6508, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 5695507448064}, 'timestamp': '2025-09-10 02:29:42.039853', 'step': 6508, 'epoch': 3} {'type': 'loss', 'content': 0.0003358405956532806, 'timestamp': '2025-09-10 02:29:42.043031', 'step': 6509, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 4746299996032}, 'timestamp': '2025-09-10 02:29:42.078894', 'step': 6509, 'epoch': 3} {'type': 'loss', 'content': 0.00010202966223005205, 'timestamp': '2025-09-10 02:29:42.081544', 'step': 6510, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 5695507448064}, 'timestamp': '2025-09-10 02:29:42.112909', 'step': 6510, 'epoch': 3} {'type': 'loss', 'content': 5.8475307014305145e-05, 'timestamp': '2025-09-10 02:29:42.117315', 'step': 6511, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 7119318626112}, 'timestamp': '2025-09-10 02:29:42.148295', 'step': 6511, 'epoch': 3} {'type': 'loss', 'content': 0.0015812184428796172, 'timestamp': '2025-09-10 02:29:42.176703', 'step': 6512, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 6170111174080}, 'timestamp': '2025-09-10 02:29:42.209147', 'step': 6512, 'epoch': 3} {'type': 'loss', 'content': 0.000630614347755909, 'timestamp': '2025-09-10 02:29:42.213683', 'step': 6513, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 304], 'flops': 9017733530176}, 'timestamp': '2025-09-10 02:29:42.246530', 'step': 6513, 'epoch': 3} {'type': 'loss', 'content': 0.002638591453433037, 'timestamp': '2025-09-10 02:29:42.258549', 'step': 6514, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 336], 'flops': 9966940982208}, 'timestamp': '2025-09-10 02:29:42.292876', 'step': 6514, 'epoch': 3} {'type': 'loss', 'content': 0.0018498777644708753, 'timestamp': '2025-09-10 02:29:42.306207', 'step': 6515, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 336], 'flops': 9966940982208}, 'timestamp': '2025-09-10 02:29:42.345000', 'step': 6515, 'epoch': 3} {'type': 'loss', 'content': 0.0014997952384874225, 'timestamp': '2025-09-10 02:29:42.379266', 'step': 6516, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 4746299996032}, 'timestamp': '2025-09-10 02:29:42.423451', 'step': 6516, 'epoch': 3} {'type': 'loss', 'content': 5.7881530665326864e-05, 'timestamp': '2025-09-10 02:29:42.426209', 'step': 6517, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 5695507448064}, 'timestamp': '2025-09-10 02:29:42.460619', 'step': 6517, 'epoch': 3} {'type': 'loss', 'content': 0.0023194043897092342, 'timestamp': '2025-09-10 02:29:42.464969', 'step': 6518, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 5695507448064}, 'timestamp': '2025-09-10 02:29:42.495158', 'step': 6518, 'epoch': 3} {'type': 'loss', 'content': 0.0002513010986149311, 'timestamp': '2025-09-10 02:29:42.499820', 'step': 6519, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 7119318626112}, 'timestamp': '2025-09-10 02:29:42.530931', 'step': 6519, 'epoch': 3} {'type': 'loss', 'content': 9.767297888174653e-05, 'timestamp': '2025-09-10 02:29:42.559204', 'step': 6520, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 336], 'flops': 9966940982208}, 'timestamp': '2025-09-10 02:29:42.592773', 'step': 6520, 'epoch': 3} {'type': 'loss', 'content': 0.0012455906253308058, 'timestamp': '2025-09-10 02:29:42.605309', 'step': 6521, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 5695507448064}, 'timestamp': '2025-09-10 02:29:42.638972', 'step': 6521, 'epoch': 3} {'type': 'loss', 'content': 0.00012123944179620594, 'timestamp': '2025-09-10 02:29:42.644016', 'step': 6522, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 8543129804160}, 'timestamp': '2025-09-10 02:29:42.700829', 'step': 6522, 'epoch': 3} {'type': 'loss', 'content': 0.00010264220327371731, 'timestamp': '2025-09-10 02:29:42.711735', 'step': 6523, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 8068526078144}, 'timestamp': '2025-09-10 02:29:42.743875', 'step': 6523, 'epoch': 3} {'type': 'loss', 'content': 0.00013027484237682074, 'timestamp': '2025-09-10 02:29:42.774849', 'step': 6524, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 4746299996032}, 'timestamp': '2025-09-10 02:29:42.829251', 'step': 6524, 'epoch': 3} {'type': 'loss', 'content': 6.051839955034666e-05, 'timestamp': '2025-09-10 02:29:42.830918', 'step': 6525, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 304], 'flops': 9017733530176}, 'timestamp': '2025-09-10 02:29:42.863231', 'step': 6525, 'epoch': 3} {'type': 'loss', 'content': 0.00017694670532364398, 'timestamp': '2025-09-10 02:29:42.875174', 'step': 6526, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 6170111174080}, 'timestamp': '2025-09-10 02:29:42.911744', 'step': 6526, 'epoch': 3} {'type': 'loss', 'content': 0.0001635013904888183, 'timestamp': '2025-09-10 02:29:42.918386', 'step': 6527, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 8068526078144}, 'timestamp': '2025-09-10 02:29:42.951115', 'step': 6527, 'epoch': 3} {'type': 'loss', 'content': 0.000155866306158714, 'timestamp': '2025-09-10 02:29:42.981872', 'step': 6528, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 304], 'flops': 9017733530176}, 'timestamp': '2025-09-10 02:29:43.018101', 'step': 6528, 'epoch': 3} {'type': 'loss', 'content': 0.00020132049394305795, 'timestamp': '2025-09-10 02:29:43.027472', 'step': 6529, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-10 02:29:43.059876', 'step': 6529, 'epoch': 3} {'type': 'loss', 'content': 0.0001669298653723672, 'timestamp': '2025-09-10 02:29:43.064357', 'step': 6530, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 352], 'flops': 10441544708224}, 'timestamp': '2025-09-10 02:29:43.106956', 'step': 6530, 'epoch': 3} {'type': 'loss', 'content': 9.987391968024895e-05, 'timestamp': '2025-09-10 02:29:43.120572', 'step': 6531, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 8068526078144}, 'timestamp': '2025-09-10 02:29:43.161837', 'step': 6531, 'epoch': 3} {'type': 'loss', 'content': 6.64821855025366e-05, 'timestamp': '2025-09-10 02:29:43.192643', 'step': 6532, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 6644714900096}, 'timestamp': '2025-09-10 02:29:43.233736', 'step': 6532, 'epoch': 3} {'type': 'loss', 'content': 0.00035039763315580785, 'timestamp': '2025-09-10 02:29:43.238128', 'step': 6533, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 8543129804160}, 'timestamp': '2025-09-10 02:29:43.275737', 'step': 6533, 'epoch': 3} {'type': 'loss', 'content': 0.00019631556642707437, 'timestamp': '2025-09-10 02:29:43.286331', 'step': 6534, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 6644714900096}, 'timestamp': '2025-09-10 02:29:43.321424', 'step': 6534, 'epoch': 3} {'type': 'loss', 'content': 0.004502573050558567, 'timestamp': '2025-09-10 02:29:43.328363', 'step': 6535, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 384], 'flops': 11390752160256}, 'timestamp': '2025-09-10 02:29:43.367093', 'step': 6535, 'epoch': 3} {'type': 'loss', 'content': 8.936825179262087e-05, 'timestamp': '2025-09-10 02:29:43.401939', 'step': 6536, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 6170111174080}, 'timestamp': '2025-09-10 02:29:43.444500', 'step': 6536, 'epoch': 3} {'type': 'loss', 'content': 0.0012994735734537244, 'timestamp': '2025-09-10 02:29:43.450004', 'step': 6537, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 320], 'flops': 9492337256192}, 'timestamp': '2025-09-10 02:29:43.488581', 'step': 6537, 'epoch': 3} {'type': 'loss', 'content': 0.00024626238155178726, 'timestamp': '2025-09-10 02:29:43.500954', 'step': 6538, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 6644714900096}, 'timestamp': '2025-09-10 02:29:43.537888', 'step': 6538, 'epoch': 3} {'type': 'loss', 'content': 0.0011256147408857942, 'timestamp': '2025-09-10 02:29:43.544647', 'step': 6539, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 7593922352128}, 'timestamp': '2025-09-10 02:29:43.582021', 'step': 6539, 'epoch': 3} {'type': 'loss', 'content': 0.0003471332311164588, 'timestamp': '2025-09-10 02:29:43.610260', 'step': 6540, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 6170111174080}, 'timestamp': '2025-09-10 02:29:43.648185', 'step': 6540, 'epoch': 3} {'type': 'loss', 'content': 0.0005624201148748398, 'timestamp': '2025-09-10 02:29:43.652530', 'step': 6541, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-10 02:29:43.685826', 'step': 6541, 'epoch': 3} {'type': 'loss', 'content': 0.000520360074006021, 'timestamp': '2025-09-10 02:29:43.688755', 'step': 6542, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 6644714900096}, 'timestamp': '2025-09-10 02:29:43.728439', 'step': 6542, 'epoch': 3} {'type': 'loss', 'content': 0.0008386308327317238, 'timestamp': '2025-09-10 02:29:43.735366', 'step': 6543, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 8068526078144}, 'timestamp': '2025-09-10 02:29:43.775439', 'step': 6543, 'epoch': 3} {'type': 'loss', 'content': 0.0001823129568947479, 'timestamp': '2025-09-10 02:29:43.806352', 'step': 6544, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 6170111174080}, 'timestamp': '2025-09-10 02:29:43.839287', 'step': 6544, 'epoch': 3} {'type': 'loss', 'content': 3.192834265064448e-05, 'timestamp': '2025-09-10 02:29:43.843723', 'step': 6545, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 5695507448064}, 'timestamp': '2025-09-10 02:29:43.875102', 'step': 6545, 'epoch': 3} {'type': 'loss', 'content': 0.0004066001274622977, 'timestamp': '2025-09-10 02:29:43.879468', 'step': 6546, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 176], 'flops': 5220903722048}, 'timestamp': '2025-09-10 02:29:43.911348', 'step': 6546, 'epoch': 3} {'type': 'loss', 'content': 0.005676894914358854, 'timestamp': '2025-09-10 02:29:43.915314', 'step': 6547, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 7593922352128}, 'timestamp': '2025-09-10 02:29:43.949276', 'step': 6547, 'epoch': 3} {'type': 'loss', 'content': 0.0017382523510605097, 'timestamp': '2025-09-10 02:29:43.977822', 'step': 6548, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 7593922352128}, 'timestamp': '2025-09-10 02:29:44.008971', 'step': 6548, 'epoch': 3} {'type': 'loss', 'content': 0.00020217923156451434, 'timestamp': '2025-09-10 02:29:44.014497', 'step': 6549, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 8068526078144}, 'timestamp': '2025-09-10 02:29:44.047133', 'step': 6549, 'epoch': 3} {'type': 'loss', 'content': 0.0010007356759160757, 'timestamp': '2025-09-10 02:29:44.057103', 'step': 6550, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 7119318626112}, 'timestamp': '2025-09-10 02:29:44.093005', 'step': 6550, 'epoch': 3} {'type': 'loss', 'content': 0.0002772933221422136, 'timestamp': '2025-09-10 02:29:44.100364', 'step': 6551, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 304], 'flops': 9017733530176}, 'timestamp': '2025-09-10 02:29:44.132702', 'step': 6551, 'epoch': 3} {'type': 'loss', 'content': 8.65510301082395e-05, 'timestamp': '2025-09-10 02:29:44.165536', 'step': 6552, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 6170111174080}, 'timestamp': '2025-09-10 02:29:44.199627', 'step': 6552, 'epoch': 3} {'type': 'loss', 'content': 0.00012201262870803475, 'timestamp': '2025-09-10 02:29:44.207311', 'step': 6553, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 304], 'flops': 9017733530176}, 'timestamp': '2025-09-10 02:29:44.241883', 'step': 6553, 'epoch': 3} {'type': 'loss', 'content': 2.7427620807429776e-05, 'timestamp': '2025-09-10 02:29:44.253642', 'step': 6554, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 176], 'flops': 5220903722048}, 'timestamp': '2025-09-10 02:29:44.289381', 'step': 6554, 'epoch': 3} {'type': 'loss', 'content': 0.0004760746378451586, 'timestamp': '2025-09-10 02:29:44.293433', 'step': 6555, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 7119318626112}, 'timestamp': '2025-09-10 02:29:44.326167', 'step': 6555, 'epoch': 3} {'type': 'loss', 'content': 5.913670247537084e-05, 'timestamp': '2025-09-10 02:29:44.354407', 'step': 6556, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 7593922352128}, 'timestamp': '2025-09-10 02:29:44.384952', 'step': 6556, 'epoch': 3} {'type': 'loss', 'content': 0.0003855906252283603, 'timestamp': '2025-09-10 02:29:44.390366', 'step': 6557, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 7593922352128}, 'timestamp': '2025-09-10 02:29:44.421919', 'step': 6557, 'epoch': 3} {'type': 'loss', 'content': 0.0006527866353280842, 'timestamp': '2025-09-10 02:29:44.429692', 'step': 6558, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 6644714900096}, 'timestamp': '2025-09-10 02:29:44.460396', 'step': 6558, 'epoch': 3} {'type': 'loss', 'content': 0.0004126446438021958, 'timestamp': '2025-09-10 02:29:44.467105', 'step': 6559, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 7593922352128}, 'timestamp': '2025-09-10 02:29:44.499030', 'step': 6559, 'epoch': 3} {'type': 'loss', 'content': 0.00010248164471704513, 'timestamp': '2025-09-10 02:29:44.527617', 'step': 6560, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 8543129804160}, 'timestamp': '2025-09-10 02:29:44.582600', 'step': 6560, 'epoch': 3} {'type': 'loss', 'content': 2.3595810489496216e-05, 'timestamp': '2025-09-10 02:29:44.590845', 'step': 6561, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 352], 'flops': 10441544708224}, 'timestamp': '2025-09-10 02:29:44.647250', 'step': 6561, 'epoch': 3} {'type': 'loss', 'content': 3.688699143822305e-05, 'timestamp': '2025-09-10 02:29:44.660980', 'step': 6562, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 6170111174080}, 'timestamp': '2025-09-10 02:29:44.699999', 'step': 6562, 'epoch': 3} {'type': 'loss', 'content': 0.00012046539632137865, 'timestamp': '2025-09-10 02:29:44.706900', 'step': 6563, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 7119318626112}, 'timestamp': '2025-09-10 02:29:44.742152', 'step': 6563, 'epoch': 3} {'type': 'loss', 'content': 9.20097081689164e-05, 'timestamp': '2025-09-10 02:29:44.778567', 'step': 6564, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 7119318626112}, 'timestamp': '2025-09-10 02:29:44.867801', 'step': 6564, 'epoch': 3} {'type': 'loss', 'content': 7.921749056549743e-05, 'timestamp': '2025-09-10 02:29:44.872821', 'step': 6565, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 416], 'flops': 12339959612288}, 'timestamp': '2025-09-10 02:29:44.939179', 'step': 6565, 'epoch': 3} {'type': 'loss', 'content': 0.0032475763000547886, 'timestamp': '2025-09-10 02:29:44.955024', 'step': 6566, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 848], 'flops': 25154260214720}, 'timestamp': '2025-09-10 02:29:45.056353', 'step': 6566, 'epoch': 3} {'type': 'loss', 'content': 8.683669875608757e-05, 'timestamp': '2025-09-10 02:29:45.085999', 'step': 6567, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 320], 'flops': 9492337256192}, 'timestamp': '2025-09-10 02:29:45.121927', 'step': 6567, 'epoch': 3} {'type': 'loss', 'content': 7.070793799357489e-05, 'timestamp': '2025-09-10 02:29:45.155401', 'step': 6568, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 176], 'flops': 5220903722048}, 'timestamp': '2025-09-10 02:29:45.186619', 'step': 6568, 'epoch': 3} {'type': 'loss', 'content': 0.0010982885723933578, 'timestamp': '2025-09-10 02:29:45.189689', 'step': 6569, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 5695507448064}, 'timestamp': '2025-09-10 02:29:45.223202', 'step': 6569, 'epoch': 3} {'type': 'loss', 'content': 7.731281948508695e-05, 'timestamp': '2025-09-10 02:29:45.227551', 'step': 6570, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 8068526078144}, 'timestamp': '2025-09-10 02:29:45.259253', 'step': 6570, 'epoch': 3} {'type': 'loss', 'content': 0.00015924021136015654, 'timestamp': '2025-09-10 02:29:45.269427', 'step': 6571, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 6170111174080}, 'timestamp': '2025-09-10 02:29:45.306151', 'step': 6571, 'epoch': 3} {'type': 'loss', 'content': 0.00023259581939782947, 'timestamp': '2025-09-10 02:29:45.333773', 'step': 6572, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 352], 'flops': 10441544708224}, 'timestamp': '2025-09-10 02:29:45.367653', 'step': 6572, 'epoch': 3} {'type': 'loss', 'content': 0.0009659113129600883, 'timestamp': '2025-09-10 02:29:45.380672', 'step': 6573, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 384], 'flops': 11390752160256}, 'timestamp': '2025-09-10 02:29:45.416456', 'step': 6573, 'epoch': 3} {'type': 'loss', 'content': 0.0002564275055192411, 'timestamp': '2025-09-10 02:29:45.430411', 'step': 6574, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 176], 'flops': 5220903722048}, 'timestamp': '2025-09-10 02:29:45.462075', 'step': 6574, 'epoch': 3} {'type': 'loss', 'content': 8.740082557778805e-05, 'timestamp': '2025-09-10 02:29:45.466099', 'step': 6575, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 512], 'flops': 15187581968384}, 'timestamp': '2025-09-10 02:29:45.509626', 'step': 6575, 'epoch': 3} {'type': 'loss', 'content': 0.00016587102436460555, 'timestamp': '2025-09-10 02:29:45.548209', 'step': 6576, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 432], 'flops': 12814563338304}, 'timestamp': '2025-09-10 02:29:45.591561', 'step': 6576, 'epoch': 3} {'type': 'loss', 'content': 0.0008845412521623075, 'timestamp': '2025-09-10 02:29:45.607247', 'step': 6577, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 5695507448064}, 'timestamp': '2025-09-10 02:29:45.645434', 'step': 6577, 'epoch': 3} {'type': 'loss', 'content': 0.017135100439190865, 'timestamp': '2025-09-10 02:29:45.649484', 'step': 6578, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 320], 'flops': 9492337256192}, 'timestamp': '2025-09-10 02:29:45.685410', 'step': 6578, 'epoch': 3} {'type': 'loss', 'content': 0.005552711896598339, 'timestamp': '2025-09-10 02:29:45.697410', 'step': 6579, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 7593922352128}, 'timestamp': '2025-09-10 02:29:45.730471', 'step': 6579, 'epoch': 3} {'type': 'loss', 'content': 5.742481516790576e-05, 'timestamp': '2025-09-10 02:29:45.766625', 'step': 6580, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 8543129804160}, 'timestamp': '2025-09-10 02:29:45.802511', 'step': 6580, 'epoch': 3} {'type': 'loss', 'content': 0.00011518421524669975, 'timestamp': '2025-09-10 02:29:45.810931', 'step': 6581, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 8543129804160}, 'timestamp': '2025-09-10 02:29:45.844646', 'step': 6581, 'epoch': 3} {'type': 'loss', 'content': 0.02741926722228527, 'timestamp': '2025-09-10 02:29:45.855494', 'step': 6582, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 6644714900096}, 'timestamp': '2025-09-10 02:29:45.888579', 'step': 6582, 'epoch': 3} {'type': 'loss', 'content': 0.0002638068108353764, 'timestamp': '2025-09-10 02:29:45.895389', 'step': 6583, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 336], 'flops': 9966940982208}, 'timestamp': '2025-09-10 02:29:45.934941', 'step': 6583, 'epoch': 3} {'type': 'loss', 'content': 7.82187344157137e-05, 'timestamp': '2025-09-10 02:29:45.969171', 'step': 6584, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 6170111174080}, 'timestamp': '2025-09-10 02:29:46.009699', 'step': 6584, 'epoch': 3} {'type': 'loss', 'content': 0.00043393290252424777, 'timestamp': '2025-09-10 02:29:46.014232', 'step': 6585, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 8068526078144}, 'timestamp': '2025-09-10 02:29:46.054569', 'step': 6585, 'epoch': 3} {'type': 'loss', 'content': 0.0005896832444705069, 'timestamp': '2025-09-10 02:29:46.063670', 'step': 6586, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 6644714900096}, 'timestamp': '2025-09-10 02:29:46.097949', 'step': 6586, 'epoch': 3} {'type': 'loss', 'content': 2.4004228180274367e-05, 'timestamp': '2025-09-10 02:29:46.104779', 'step': 6587, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:29:46.143132', 'step': 6587, 'epoch': 3} {'type': 'loss', 'content': 4.16260190831963e-05, 'timestamp': '2025-09-10 02:29:46.167051', 'step': 6588, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 400], 'flops': 11865355886272}, 'timestamp': '2025-09-10 02:29:46.202964', 'step': 6588, 'epoch': 3} {'type': 'loss', 'content': 0.08385879546403885, 'timestamp': '2025-09-10 02:29:46.218140', 'step': 6589, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 5695507448064}, 'timestamp': '2025-09-10 02:29:46.253903', 'step': 6589, 'epoch': 3} {'type': 'loss', 'content': 4.120770245208405e-05, 'timestamp': '2025-09-10 02:29:46.258184', 'step': 6590, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 320], 'flops': 9492337256192}, 'timestamp': '2025-09-10 02:29:46.292173', 'step': 6590, 'epoch': 3} {'type': 'loss', 'content': 0.0004548307042568922, 'timestamp': '2025-09-10 02:29:46.304343', 'step': 6591, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 8068526078144}, 'timestamp': '2025-09-10 02:29:46.337290', 'step': 6591, 'epoch': 3} {'type': 'loss', 'content': 0.0008054524078033864, 'timestamp': '2025-09-10 02:29:46.367624', 'step': 6592, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 6644714900096}, 'timestamp': '2025-09-10 02:29:46.403516', 'step': 6592, 'epoch': 3} {'type': 'loss', 'content': 0.000291215896140784, 'timestamp': '2025-09-10 02:29:46.407847', 'step': 6593, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 400], 'flops': 11865355886272}, 'timestamp': '2025-09-10 02:29:46.446194', 'step': 6593, 'epoch': 3} {'type': 'loss', 'content': 2.139638854714576e-05, 'timestamp': '2025-09-10 02:29:46.461869', 'step': 6594, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 7593922352128}, 'timestamp': '2025-09-10 02:29:46.494685', 'step': 6594, 'epoch': 3} {'type': 'loss', 'content': 2.8166157790110447e-05, 'timestamp': '2025-09-10 02:29:46.502312', 'step': 6595, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 7119318626112}, 'timestamp': '2025-09-10 02:29:46.533636', 'step': 6595, 'epoch': 3} {'type': 'loss', 'content': 0.0057443794794380665, 'timestamp': '2025-09-10 02:29:46.561742', 'step': 6596, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 352], 'flops': 10441544708224}, 'timestamp': '2025-09-10 02:29:46.595237', 'step': 6596, 'epoch': 3} {'type': 'loss', 'content': 9.872858208836988e-05, 'timestamp': '2025-09-10 02:29:46.608219', 'step': 6597, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 7119318626112}, 'timestamp': '2025-09-10 02:29:46.639652', 'step': 6597, 'epoch': 3} {'type': 'loss', 'content': 0.00032129278406500816, 'timestamp': '2025-09-10 02:29:46.646828', 'step': 6598, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 6170111174080}, 'timestamp': '2025-09-10 02:29:46.678399', 'step': 6598, 'epoch': 3} {'type': 'loss', 'content': 5.182164386496879e-05, 'timestamp': '2025-09-10 02:29:46.685105', 'step': 6599, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 320], 'flops': 9492337256192}, 'timestamp': '2025-09-10 02:29:46.720441', 'step': 6599, 'epoch': 3} {'type': 'loss', 'content': 0.0004248176119290292, 'timestamp': '2025-09-10 02:29:46.753685', 'step': 6600, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 6170111174080}, 'timestamp': '2025-09-10 02:29:46.793074', 'step': 6600, 'epoch': 3} {'type': 'loss', 'content': 5.2954368584323674e-05, 'timestamp': '2025-09-10 02:29:46.797228', 'step': 6601, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 7119318626112}, 'timestamp': '2025-09-10 02:29:46.828902', 'step': 6601, 'epoch': 3} {'type': 'loss', 'content': 6.363394641084597e-05, 'timestamp': '2025-09-10 02:29:46.835986', 'step': 6602, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 7593922352128}, 'timestamp': '2025-09-10 02:29:46.871029', 'step': 6602, 'epoch': 3} {'type': 'loss', 'content': 0.0003344352007843554, 'timestamp': '2025-09-10 02:29:46.878236', 'step': 6603, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 6644714900096}, 'timestamp': '2025-09-10 02:29:46.912555', 'step': 6603, 'epoch': 3} {'type': 'loss', 'content': 0.0003345024597365409, 'timestamp': '2025-09-10 02:29:46.940194', 'step': 6604, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 7593922352128}, 'timestamp': '2025-09-10 02:29:46.975062', 'step': 6604, 'epoch': 3} {'type': 'loss', 'content': 0.00013073600712232292, 'timestamp': '2025-09-10 02:29:46.980181', 'step': 6605, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 6170111174080}, 'timestamp': '2025-09-10 02:29:47.011831', 'step': 6605, 'epoch': 3} {'type': 'loss', 'content': 0.00012158307799836621, 'timestamp': '2025-09-10 02:29:47.018238', 'step': 6606, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 7593922352128}, 'timestamp': '2025-09-10 02:29:47.050475', 'step': 6606, 'epoch': 3} {'type': 'loss', 'content': 0.00014575115346815437, 'timestamp': '2025-09-10 02:29:47.059974', 'step': 6607, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 656], 'flops': 19459015502528}, 'timestamp': '2025-09-10 02:29:47.115534', 'step': 6607, 'epoch': 3} {'type': 'loss', 'content': 0.0006869042408652604, 'timestamp': '2025-09-10 02:29:47.159829', 'step': 6608, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 8543129804160}, 'timestamp': '2025-09-10 02:29:47.192433', 'step': 6608, 'epoch': 3} {'type': 'loss', 'content': 0.0005232971161603928, 'timestamp': '2025-09-10 02:29:47.200103', 'step': 6609, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 6644714900096}, 'timestamp': '2025-09-10 02:29:47.231788', 'step': 6609, 'epoch': 3} {'type': 'loss', 'content': 0.0002385809930274263, 'timestamp': '2025-09-10 02:29:47.238211', 'step': 6610, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 416], 'flops': 12339959612288}, 'timestamp': '2025-09-10 02:29:47.278395', 'step': 6610, 'epoch': 3} {'type': 'loss', 'content': 0.00031182175735011697, 'timestamp': '2025-09-10 02:29:47.294275', 'step': 6611, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 6170111174080}, 'timestamp': '2025-09-10 02:29:47.325939', 'step': 6611, 'epoch': 3} {'type': 'loss', 'content': 9.482206223765388e-05, 'timestamp': '2025-09-10 02:29:47.353366', 'step': 6612, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 7593922352128}, 'timestamp': '2025-09-10 02:29:47.387214', 'step': 6612, 'epoch': 3} {'type': 'loss', 'content': 0.0003503487096168101, 'timestamp': '2025-09-10 02:29:47.392229', 'step': 6613, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 304], 'flops': 9017733530176}, 'timestamp': '2025-09-10 02:29:47.423378', 'step': 6613, 'epoch': 3} {'type': 'loss', 'content': 0.00023075289209373295, 'timestamp': '2025-09-10 02:29:47.435195', 'step': 6614, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 368], 'flops': 10916148434240}, 'timestamp': '2025-09-10 02:29:47.470337', 'step': 6614, 'epoch': 3} {'type': 'loss', 'content': 7.644097786396742e-05, 'timestamp': '2025-09-10 02:29:47.484129', 'step': 6615, 'epoch': 3} {'type': 'flops', 'content': [{'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5695213340928}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3796808916480}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4429610391296}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4746011128704}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4746011128704}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6011614078336}, {'type': 'perplexity', 'in_batch_dim': [8, 736], 'batch_size': 8, 'flops': 14554433988352}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3796808916480}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4113209653888}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3796808916480}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4746011128704}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2847606704256}, {'type': 'perplexity', 'in_batch_dim': [8, 320], 'batch_size': 8, 'flops': 6328014815744}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5378812603520}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3796808916480}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4746011128704}, {'type': 'perplexity', 'in_batch_dim': [8, 352], 'batch_size': 8, 'flops': 6960816290560}, {'type': 'perplexity', 'in_batch_dim': [8, 384], 'batch_size': 8, 'flops': 7593617765376}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5378812603520}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4429610391296}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4113209653888}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5062411866112}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5062411866112}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4429610391296}, {'type': 'perplexity', 'in_batch_dim': [8, 480], 'batch_size': 8, 'flops': 9492022189824}, {'type': 'perplexity', 'in_batch_dim': [8, 448], 'batch_size': 8, 'flops': 8859220715008}, {'type': 'perplexity', 'in_batch_dim': [8, 320], 'batch_size': 8, 'flops': 6328014815744}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3796808916480}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6011614078336}, {'type': 'perplexity', 'in_batch_dim': [8, 320], 'batch_size': 8, 'flops': 6328014815744}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5695213340928}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4746011128704}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4429610391296}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5378812603520}, {'type': 'perplexity', 'in_batch_dim': [8, 544], 'batch_size': 8, 'flops': 10757625139456}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4429610391296}, {'type': 'perplexity', 'in_batch_dim': [8, 320], 'batch_size': 8, 'flops': 6328014815744}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4113209653888}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4113209653888}, {'type': 'perplexity', 'in_batch_dim': [8, 320], 'batch_size': 8, 'flops': 6328014815744}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3480408179072}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5062411866112}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6011614078336}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4429610391296}, {'type': 'perplexity', 'in_batch_dim': [8, 384], 'batch_size': 8, 'flops': 7593617765376}, {'type': 'perplexity', 'in_batch_dim': [8, 416], 'batch_size': 8, 'flops': 8226419240192}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4746011128704}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3480408179072}, {'type': 'perplexity', 'in_batch_dim': [8, 384], 'batch_size': 8, 'flops': 7593617765376}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3480408179072}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3480408179072}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5378812603520}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3796808916480}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4113209653888}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5062411866112}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5378812603520}, {'type': 'perplexity', 'in_batch_dim': [8, 320], 'batch_size': 8, 'flops': 6328014815744}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4429610391296}, {'type': 'perplexity', 'in_batch_dim': [8, 160], 'batch_size': 8, 'flops': 3164007441664}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4113209653888}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5695213340928}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3796808916480}, {'type': 'perplexity', 'in_batch_dim': [8, 160], 'batch_size': 8, 'flops': 3164007441664}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6011614078336}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5062411866112}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4429610391296}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3480408179072}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4746011128704}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3480408179072}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4113209653888}, {'type': 'perplexity', 'in_batch_dim': [8, 624], 'batch_size': 8, 'flops': 12339628826496}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4113209653888}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5695213340928}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5062411866112}, {'type': 'perplexity', 'in_batch_dim': [8, 384], 'batch_size': 8, 'flops': 7593617765376}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4429610391296}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4113209653888}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5062411866112}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4429610391296}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5695213340928}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3796808916480}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5378812603520}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4429610391296}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5378812603520}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3796808916480}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5695213340928}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4429610391296}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5062411866112}, {'type': 'perplexity', 'in_batch_dim': [8, 160], 'batch_size': 8, 'flops': 3164007441664}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4746011128704}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5695213340928}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3796808916480}, {'type': 'perplexity', 'in_batch_dim': [8, 416], 'batch_size': 8, 'flops': 8226419240192}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3796808916480}, {'type': 'perplexity', 'in_batch_dim': [8, 160], 'batch_size': 8, 'flops': 3164007441664}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4746011128704}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3796808916480}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2847606704256}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5378812603520}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4429610391296}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4113209653888}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4429610391296}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6011614078336}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4113209653888}, {'type': 'perplexity', 'in_batch_dim': [8, 320], 'batch_size': 8, 'flops': 6328014815744}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4746011128704}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4746011128704}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3796808916480}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5062411866112}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3480408179072}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3796808916480}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3480408179072}, {'type': 'perplexity', 'in_batch_dim': [8, 352], 'batch_size': 8, 'flops': 6960816290560}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5062411866112}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4429610391296}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6011614078336}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5062411866112}, {'type': 'perplexity', 'in_batch_dim': [8, 576], 'batch_size': 8, 'flops': 11390426614272}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4429610391296}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3796808916480}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6011614078336}, {'type': 'perplexity', 'in_batch_dim': [8, 432], 'batch_size': 8, 'flops': 8542819977600}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5378812603520}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5378812603520}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5062411866112}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4113209653888}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4113209653888}, {'type': 'perplexity', 'in_batch_dim': [8, 368], 'batch_size': 8, 'flops': 7277217027968}, {'type': 'perplexity', 'in_batch_dim': [8, 160], 'batch_size': 8, 'flops': 3164007441664}, {'type': 'perplexity', 'in_batch_dim': [8, 368], 'batch_size': 8, 'flops': 7277217027968}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3796808916480}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4746011128704}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4113209653888}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4429610391296}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5695213340928}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5695213340928}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5062411866112}, {'type': 'perplexity', 'in_batch_dim': [8, 432], 'batch_size': 8, 'flops': 8542819977600}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5062411866112}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4113209653888}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3480408179072}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3480408179072}, {'type': 'perplexity', 'in_batch_dim': [8, 160], 'batch_size': 8, 'flops': 3164007441664}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4113209653888}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3796808916480}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5062411866112}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 368], 'batch_size': 8, 'flops': 7277217027968}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5378812603520}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5378812603520}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5062411866112}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2847606704256}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4429610391296}, {'type': 'perplexity', 'in_batch_dim': [8, 320], 'batch_size': 8, 'flops': 6328014815744}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3796808916480}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4429610391296}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4113209653888}, {'type': 'perplexity', 'in_batch_dim': [8, 384], 'batch_size': 8, 'flops': 7593617765376}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5378812603520}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4746011128704}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3480408179072}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5062411866112}, {'type': 'perplexity', 'in_batch_dim': [8, 320], 'batch_size': 8, 'flops': 6328014815744}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5378812603520}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4113209653888}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4113209653888}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5695213340928}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3480408179072}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6011614078336}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4746011128704}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5378812603520}, {'type': 'perplexity', 'in_batch_dim': [8, 320], 'batch_size': 8, 'flops': 6328014815744}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4429610391296}, {'type': 'perplexity', 'in_batch_dim': [8, 320], 'batch_size': 8, 'flops': 6328014815744}, {'type': 'perplexity', 'in_batch_dim': [8, 416], 'batch_size': 8, 'flops': 8226419240192}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5062411866112}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5062411866112}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2847606704256}, {'type': 'perplexity', 'in_batch_dim': [8, 336], 'batch_size': 8, 'flops': 6644415553152}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6011614078336}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5062411866112}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4113209653888}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5695213340928}, {'type': 'perplexity', 'in_batch_dim': [8, 416], 'batch_size': 8, 'flops': 8226419240192}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3796808916480}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5695213340928}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4746011128704}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4429610391296}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4113209653888}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3480408179072}, {'type': 'perplexity', 'in_batch_dim': [8, 320], 'batch_size': 8, 'flops': 6328014815744}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3480408179072}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5378812603520}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6011614078336}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4429610391296}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5695213340928}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5695213340928}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3480408179072}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4429610391296}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5062411866112}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3480408179072}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4113209653888}, {'type': 'perplexity', 'in_batch_dim': [8, 352], 'batch_size': 8, 'flops': 6960816290560}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4113209653888}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2847606704256}, {'type': 'perplexity', 'in_batch_dim': [8, 160], 'batch_size': 8, 'flops': 3164007441664}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3796808916480}, {'type': 'perplexity', 'in_batch_dim': [8, 496], 'batch_size': 8, 'flops': 9808422927232}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3480408179072}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4113209653888}, {'type': 'perplexity', 'in_batch_dim': [8, 544], 'batch_size': 8, 'flops': 10757625139456}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6011614078336}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5378812603520}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5695213340928}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4746011128704}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5695213340928}, {'type': 'perplexity', 'in_batch_dim': [8, 384], 'batch_size': 8, 'flops': 7593617765376}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3480408179072}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5378812603520}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3480408179072}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5378812603520}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5062411866112}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3796808916480}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5062411866112}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5695213340928}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5378812603520}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4429610391296}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5062411866112}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3480408179072}, {'type': 'perplexity', 'in_batch_dim': [8, 656], 'batch_size': 8, 'flops': 12972430301312}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5062411866112}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3796808916480}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3480408179072}, {'type': 'perplexity', 'in_batch_dim': [8, 160], 'batch_size': 8, 'flops': 3164007441664}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5695213340928}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6011614078336}, {'type': 'perplexity', 'in_batch_dim': [8, 320], 'batch_size': 8, 'flops': 6328014815744}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5062411866112}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5062411866112}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5695213340928}, {'type': 'perplexity', 'in_batch_dim': [8, 320], 'batch_size': 8, 'flops': 6328014815744}, {'type': 'perplexity', 'in_batch_dim': [8, 384], 'batch_size': 8, 'flops': 7593617765376}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5062411866112}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4746011128704}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4113209653888}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4746011128704}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4746011128704}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3480408179072}, {'type': 'perplexity', 'in_batch_dim': [8, 320], 'batch_size': 8, 'flops': 6328014815744}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5695213340928}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5062411866112}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4113209653888}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4746011128704}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2847606704256}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3796808916480}, {'type': 'perplexity', 'in_batch_dim': [8, 336], 'batch_size': 8, 'flops': 6644415553152}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4113209653888}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4746011128704}, {'type': 'perplexity', 'in_batch_dim': [8, 496], 'batch_size': 8, 'flops': 9808422927232}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5378812603520}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5378812603520}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4429610391296}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5062411866112}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6011614078336}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4746011128704}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3796808916480}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4113209653888}, {'type': 'perplexity', 'in_batch_dim': [8, 496], 'batch_size': 8, 'flops': 9808422927232}, {'type': 'perplexity', 'in_batch_dim': [8, 448], 'batch_size': 8, 'flops': 8859220715008}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6011614078336}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3796808916480}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3480408179072}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5062411866112}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4429610391296}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4113209653888}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4746011128704}, {'type': 'perplexity', 'in_batch_dim': [8, 320], 'batch_size': 8, 'flops': 6328014815744}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6011614078336}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3480408179072}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3796808916480}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3796808916480}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4746011128704}, {'type': 'perplexity', 'in_batch_dim': [8, 416], 'batch_size': 8, 'flops': 8226419240192}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5695213340928}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6011614078336}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6011614078336}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4746011128704}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3480408179072}, {'type': 'perplexity', 'in_batch_dim': [8, 160], 'batch_size': 8, 'flops': 3164007441664}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5695213340928}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3480408179072}, {'type': 'perplexity', 'in_batch_dim': [8, 400], 'batch_size': 8, 'flops': 7910018502784}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4429610391296}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5378812603520}, {'type': 'perplexity', 'in_batch_dim': [8, 400], 'batch_size': 8, 'flops': 7910018502784}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5378812603520}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3480408179072}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6011614078336}, {'type': 'perplexity', 'in_batch_dim': [8, 160], 'batch_size': 8, 'flops': 3164007441664}, {'type': 'perplexity', 'in_batch_dim': [8, 352], 'batch_size': 8, 'flops': 6960816290560}, {'type': 'perplexity', 'in_batch_dim': [8, 320], 'batch_size': 8, 'flops': 6328014815744}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5062411866112}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5062411866112}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4113209653888}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4429610391296}, {'type': 'perplexity', 'in_batch_dim': [8, 352], 'batch_size': 8, 'flops': 6960816290560}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4113209653888}, {'type': 'perplexity', 'in_batch_dim': [8, 160], 'batch_size': 8, 'flops': 3164007441664}, {'type': 'perplexity', 'in_batch_dim': [8, 336], 'batch_size': 8, 'flops': 6644415553152}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4429610391296}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3796808916480}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5378812603520}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5378812603520}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4113209653888}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5378812603520}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3796808916480}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5378812603520}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5378812603520}, {'type': 'perplexity', 'in_batch_dim': [8, 160], 'batch_size': 8, 'flops': 3164007441664}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3480408179072}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6011614078336}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5695213340928}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5378812603520}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5378812603520}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4429610391296}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4746011128704}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6011614078336}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5062411866112}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6011614078336}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5378812603520}, {'type': 'perplexity', 'in_batch_dim': [8, 160], 'batch_size': 8, 'flops': 3164007441664}, {'type': 'perplexity', 'in_batch_dim': [8, 160], 'batch_size': 8, 'flops': 3164007441664}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5062411866112}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5062411866112}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3796808916480}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6011614078336}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4113209653888}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4746011128704}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5378812603520}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3480408179072}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3796808916480}, {'type': 'perplexity', 'in_batch_dim': [8, 320], 'batch_size': 8, 'flops': 6328014815744}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5062411866112}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4746011128704}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5062411866112}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4746011128704}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6011614078336}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3796808916480}, {'type': 'perplexity', 'in_batch_dim': [8, 160], 'batch_size': 8, 'flops': 3164007441664}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2847606704256}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4429610391296}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4746011128704}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5062411866112}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5378812603520}, {'type': 'perplexity', 'in_batch_dim': [8, 464], 'batch_size': 8, 'flops': 9175621452416}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4113209653888}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3480408179072}, {'type': 'perplexity', 'in_batch_dim': [8, 384], 'batch_size': 8, 'flops': 7593617765376}, {'type': 'perplexity', 'in_batch_dim': [8, 448], 'batch_size': 8, 'flops': 8859220715008}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5378812603520}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4113209653888}, {'type': 'perplexity', 'in_batch_dim': [8, 320], 'batch_size': 8, 'flops': 6328014815744}, {'type': 'perplexity', 'in_batch_dim': [8, 560], 'batch_size': 8, 'flops': 11074025876864}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5378812603520}, {'type': 'perplexity', 'in_batch_dim': [8, 160], 'batch_size': 8, 'flops': 3164007441664}, {'type': 'perplexity', 'in_batch_dim': [8, 320], 'batch_size': 8, 'flops': 6328014815744}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5062411866112}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6011614078336}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4746011128704}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6011614078336}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3796808916480}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4429610391296}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4113209653888}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4746011128704}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3480408179072}, {'type': 'perplexity', 'in_batch_dim': [8, 384], 'batch_size': 8, 'flops': 7593617765376}, {'type': 'perplexity', 'in_batch_dim': [8, 576], 'batch_size': 8, 'flops': 11390426614272}, {'type': 'perplexity', 'in_batch_dim': [8, 160], 'batch_size': 8, 'flops': 3164007441664}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4746011128704}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4746011128704}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4429610391296}, {'type': 'perplexity', 'in_batch_dim': [8, 336], 'batch_size': 8, 'flops': 6644415553152}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4746011128704}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3796808916480}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3796808916480}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5062411866112}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5378812603520}, {'type': 'perplexity', 'in_batch_dim': [8, 368], 'batch_size': 8, 'flops': 7277217027968}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3480408179072}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4113209653888}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4746011128704}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3480408179072}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4746011128704}, {'type': 'perplexity', 'in_batch_dim': [8, 1168], 'batch_size': 8, 'flops': 23097253898368}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5062411866112}, {'type': 'perplexity', 'in_batch_dim': [8, 160], 'batch_size': 8, 'flops': 3164007441664}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4746011128704}, {'type': 'perplexity', 'in_batch_dim': [8, 640], 'batch_size': 8, 'flops': 12656029563904}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5378812603520}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4429610391296}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4429610391296}, {'type': 'perplexity', 'in_batch_dim': [8, 384], 'batch_size': 8, 'flops': 7593617765376}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3480408179072}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4113209653888}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3796808916480}, {'type': 'perplexity', 'in_batch_dim': [8, 336], 'batch_size': 8, 'flops': 6644415553152}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3480408179072}, {'type': 'perplexity', 'in_batch_dim': [6, 208], 'batch_size': 8, 'flops': 4113209653888}], 'timestamp': '2025-09-10 02:29:57.697352', 'step': 6615, 'epoch': 3} {'type': 'pplx', 'content': 23155743.853774708, 'timestamp': '2025-09-10 02:29:57.700483', 'step': 6615, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 304], 'flops': 9017733530176}, 'timestamp': '2025-09-10 02:29:57.731870', 'step': 6615, 'epoch': 3} {'type': 'loss', 'content': 0.00013089847925584763, 'timestamp': '2025-09-10 02:29:57.763239', 'step': 6616, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 8068526078144}, 'timestamp': '2025-09-10 02:29:57.795019', 'step': 6616, 'epoch': 3} {'type': 'loss', 'content': 0.008317952044308186, 'timestamp': '2025-09-10 02:29:57.802667', 'step': 6617, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 352], 'flops': 10441544708224}, 'timestamp': '2025-09-10 02:29:57.836916', 'step': 6617, 'epoch': 3} {'type': 'loss', 'content': 0.00014241410826798528, 'timestamp': '2025-09-10 02:29:57.850624', 'step': 6618, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 7593922352128}, 'timestamp': '2025-09-10 02:29:57.882574', 'step': 6618, 'epoch': 3} {'type': 'loss', 'content': 0.0004595229693222791, 'timestamp': '2025-09-10 02:29:57.890144', 'step': 6619, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 5695507448064}, 'timestamp': '2025-09-10 02:29:57.920736', 'step': 6619, 'epoch': 3} {'type': 'loss', 'content': 0.0002367516717640683, 'timestamp': '2025-09-10 02:29:57.945896', 'step': 6620, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 320], 'flops': 9492337256192}, 'timestamp': '2025-09-10 02:29:57.976938', 'step': 6620, 'epoch': 3} {'type': 'loss', 'content': 8.231966057792306e-05, 'timestamp': '2025-09-10 02:29:57.987433', 'step': 6621, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 8068526078144}, 'timestamp': '2025-09-10 02:29:58.018056', 'step': 6621, 'epoch': 3} {'type': 'loss', 'content': 0.00013666613085661083, 'timestamp': '2025-09-10 02:29:58.028280', 'step': 6622, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-10 02:29:58.058965', 'step': 6622, 'epoch': 3} {'type': 'loss', 'content': 0.00022755752434022725, 'timestamp': '2025-09-10 02:29:58.061395', 'step': 6623, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 6170111174080}, 'timestamp': '2025-09-10 02:29:58.092174', 'step': 6623, 'epoch': 3} {'type': 'loss', 'content': 4.438480391399935e-05, 'timestamp': '2025-09-10 02:29:58.120015', 'step': 6624, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 8068526078144}, 'timestamp': '2025-09-10 02:29:58.150543', 'step': 6624, 'epoch': 3} {'type': 'loss', 'content': 0.00011097739479737356, 'timestamp': '2025-09-10 02:29:58.158399', 'step': 6625, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 336], 'flops': 9966940982208}, 'timestamp': '2025-09-10 02:29:58.191879', 'step': 6625, 'epoch': 3} {'type': 'loss', 'content': 6.087979636504315e-05, 'timestamp': '2025-09-10 02:29:58.205310', 'step': 6626, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 7593922352128}, 'timestamp': '2025-09-10 02:29:58.236157', 'step': 6626, 'epoch': 3} {'type': 'loss', 'content': 0.0001310189691139385, 'timestamp': '2025-09-10 02:29:58.243858', 'step': 6627, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 176], 'flops': 5220903722048}, 'timestamp': '2025-09-10 02:29:58.273884', 'step': 6627, 'epoch': 3} {'type': 'loss', 'content': 0.0006077897851355374, 'timestamp': '2025-09-10 02:29:58.300002', 'step': 6628, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 6170111174080}, 'timestamp': '2025-09-10 02:29:58.330536', 'step': 6628, 'epoch': 3} {'type': 'loss', 'content': 0.00032031405135057867, 'timestamp': '2025-09-10 02:29:58.335086', 'step': 6629, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 352], 'flops': 10441544708224}, 'timestamp': '2025-09-10 02:29:58.369661', 'step': 6629, 'epoch': 3} {'type': 'loss', 'content': 0.00019072220311500132, 'timestamp': '2025-09-10 02:29:58.383352', 'step': 6630, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 176], 'flops': 5220903722048}, 'timestamp': '2025-09-10 02:29:58.416056', 'step': 6630, 'epoch': 3} {'type': 'loss', 'content': 0.00016121887892950326, 'timestamp': '2025-09-10 02:29:58.420014', 'step': 6631, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 6644714900096}, 'timestamp': '2025-09-10 02:29:58.450592', 'step': 6631, 'epoch': 3} {'type': 'loss', 'content': 0.00014108339382801205, 'timestamp': '2025-09-10 02:29:58.478324', 'step': 6632, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 8068526078144}, 'timestamp': '2025-09-10 02:29:58.510726', 'step': 6632, 'epoch': 3} {'type': 'loss', 'content': 0.00026426606927998364, 'timestamp': '2025-09-10 02:29:58.518657', 'step': 6633, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 6170111174080}, 'timestamp': '2025-09-10 02:29:58.549418', 'step': 6633, 'epoch': 3} {'type': 'loss', 'content': 0.00017428163846489042, 'timestamp': '2025-09-10 02:29:58.556254', 'step': 6634, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 7593922352128}, 'timestamp': '2025-09-10 02:29:58.587457', 'step': 6634, 'epoch': 3} {'type': 'loss', 'content': 9.75916045717895e-05, 'timestamp': '2025-09-10 02:29:58.595369', 'step': 6635, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 384], 'flops': 11390752160256}, 'timestamp': '2025-09-10 02:29:58.630012', 'step': 6635, 'epoch': 3} {'type': 'loss', 'content': 0.0006233084131963551, 'timestamp': '2025-09-10 02:29:58.664915', 'step': 6636, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 7119318626112}, 'timestamp': '2025-09-10 02:29:58.695685', 'step': 6636, 'epoch': 3} {'type': 'loss', 'content': 0.04499921575188637, 'timestamp': '2025-09-10 02:29:58.700709', 'step': 6637, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 4746299996032}, 'timestamp': '2025-09-10 02:29:58.731676', 'step': 6637, 'epoch': 3} {'type': 'loss', 'content': 0.0001879000337794423, 'timestamp': '2025-09-10 02:29:58.734198', 'step': 6638, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 304], 'flops': 9017733530176}, 'timestamp': '2025-09-10 02:29:58.765793', 'step': 6638, 'epoch': 3} {'type': 'loss', 'content': 0.00037380700814537704, 'timestamp': '2025-09-10 02:29:58.777909', 'step': 6639, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 176], 'flops': 5220903722048}, 'timestamp': '2025-09-10 02:29:58.810035', 'step': 6639, 'epoch': 3} {'type': 'loss', 'content': 0.000758981506805867, 'timestamp': '2025-09-10 02:29:58.835096', 'step': 6640, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 176], 'flops': 5220903722048}, 'timestamp': '2025-09-10 02:29:58.866403', 'step': 6640, 'epoch': 3} {'type': 'loss', 'content': 0.00025593198370188475, 'timestamp': '2025-09-10 02:29:58.869376', 'step': 6641, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 176], 'flops': 5220903722048}, 'timestamp': '2025-09-10 02:29:58.902071', 'step': 6641, 'epoch': 3} {'type': 'loss', 'content': 0.00043907135841436684, 'timestamp': '2025-09-10 02:29:58.906026', 'step': 6642, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 304], 'flops': 9017733530176}, 'timestamp': '2025-09-10 02:29:58.939048', 'step': 6642, 'epoch': 3} {'type': 'loss', 'content': 0.0006986635853536427, 'timestamp': '2025-09-10 02:29:58.951100', 'step': 6643, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 320], 'flops': 9492337256192}, 'timestamp': '2025-09-10 02:29:58.982239', 'step': 6643, 'epoch': 3} {'type': 'loss', 'content': 0.00012294725456740707, 'timestamp': '2025-09-10 02:29:59.015713', 'step': 6644, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 8068526078144}, 'timestamp': '2025-09-10 02:29:59.048251', 'step': 6644, 'epoch': 3} {'type': 'loss', 'content': 0.00016030111873988062, 'timestamp': '2025-09-10 02:29:59.056301', 'step': 6645, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 8543129804160}, 'timestamp': '2025-09-10 02:29:59.087486', 'step': 6645, 'epoch': 3} {'type': 'loss', 'content': 0.0019082374637946486, 'timestamp': '2025-09-10 02:29:59.098455', 'step': 6646, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 8068526078144}, 'timestamp': '2025-09-10 02:29:59.130766', 'step': 6646, 'epoch': 3} {'type': 'loss', 'content': 0.003217429621145129, 'timestamp': '2025-09-10 02:29:59.141083', 'step': 6647, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 8068526078144}, 'timestamp': '2025-09-10 02:29:59.171745', 'step': 6647, 'epoch': 3} {'type': 'loss', 'content': 0.0001194212309201248, 'timestamp': '2025-09-10 02:29:59.202813', 'step': 6648, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 6644714900096}, 'timestamp': '2025-09-10 02:29:59.233711', 'step': 6648, 'epoch': 3} {'type': 'loss', 'content': 0.00015604333020746708, 'timestamp': '2025-09-10 02:29:59.238488', 'step': 6649, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 7593922352128}, 'timestamp': '2025-09-10 02:29:59.270048', 'step': 6649, 'epoch': 3} {'type': 'loss', 'content': 0.0003292102483101189, 'timestamp': '2025-09-10 02:29:59.277806', 'step': 6650, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 304], 'flops': 9017733530176}, 'timestamp': '2025-09-10 02:29:59.311348', 'step': 6650, 'epoch': 3} {'type': 'loss', 'content': 0.0001702476729406044, 'timestamp': '2025-09-10 02:29:59.323584', 'step': 6651, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 304], 'flops': 9017733530176}, 'timestamp': '2025-09-10 02:29:59.353991', 'step': 6651, 'epoch': 3} {'type': 'loss', 'content': 8.551568316761404e-05, 'timestamp': '2025-09-10 02:29:59.387131', 'step': 6652, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 6170111174080}, 'timestamp': '2025-09-10 02:29:59.417958', 'step': 6652, 'epoch': 3} {'type': 'loss', 'content': 0.0008532029460184276, 'timestamp': '2025-09-10 02:29:59.422673', 'step': 6653, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 7119318626112}, 'timestamp': '2025-09-10 02:29:59.454139', 'step': 6653, 'epoch': 3} {'type': 'loss', 'content': 0.002605307847261429, 'timestamp': '2025-09-10 02:29:59.461242', 'step': 6654, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 6644714900096}, 'timestamp': '2025-09-10 02:29:59.492600', 'step': 6654, 'epoch': 3} {'type': 'loss', 'content': 7.880874909460545e-05, 'timestamp': '2025-09-10 02:29:59.499552', 'step': 6655, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 6644714900096}, 'timestamp': '2025-09-10 02:29:59.530925', 'step': 6655, 'epoch': 3} {'type': 'loss', 'content': 0.0003353517968207598, 'timestamp': '2025-09-10 02:29:59.558839', 'step': 6656, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 6170111174080}, 'timestamp': '2025-09-10 02:29:59.590568', 'step': 6656, 'epoch': 3} {'type': 'loss', 'content': 0.00013173728075344115, 'timestamp': '2025-09-10 02:29:59.595098', 'step': 6657, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 8068526078144}, 'timestamp': '2025-09-10 02:29:59.625524', 'step': 6657, 'epoch': 3} {'type': 'loss', 'content': 0.0001572458859300241, 'timestamp': '2025-09-10 02:29:59.635684', 'step': 6658, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 6170111174080}, 'timestamp': '2025-09-10 02:29:59.667141', 'step': 6658, 'epoch': 3} {'type': 'loss', 'content': 0.0001322474709013477, 'timestamp': '2025-09-10 02:29:59.674006', 'step': 6659, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 8068526078144}, 'timestamp': '2025-09-10 02:29:59.705246', 'step': 6659, 'epoch': 3} {'type': 'loss', 'content': 0.0003668780846055597, 'timestamp': '2025-09-10 02:29:59.736437', 'step': 6660, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 304], 'flops': 9017733530176}, 'timestamp': '2025-09-10 02:29:59.767777', 'step': 6660, 'epoch': 3} {'type': 'loss', 'content': 0.00014766550157219172, 'timestamp': '2025-09-10 02:29:59.777501', 'step': 6661, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 7119318626112}, 'timestamp': '2025-09-10 02:29:59.808844', 'step': 6661, 'epoch': 3} {'type': 'loss', 'content': 0.00010263031435897574, 'timestamp': '2025-09-10 02:29:59.816341', 'step': 6662, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 176], 'flops': 5220903722048}, 'timestamp': '2025-09-10 02:29:59.846707', 'step': 6662, 'epoch': 3} {'type': 'loss', 'content': 0.00016308830527123064, 'timestamp': '2025-09-10 02:29:59.850690', 'step': 6663, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 6170111174080}, 'timestamp': '2025-09-10 02:29:59.880950', 'step': 6663, 'epoch': 3} {'type': 'loss', 'content': 0.00017855261103250086, 'timestamp': '2025-09-10 02:29:59.908755', 'step': 6664, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 5695507448064}, 'timestamp': '2025-09-10 02:29:59.940347', 'step': 6664, 'epoch': 3} {'type': 'loss', 'content': 0.002598909428343177, 'timestamp': '2025-09-10 02:29:59.942693', 'step': 6665, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 7593922352128}, 'timestamp': '2025-09-10 02:29:59.972688', 'step': 6665, 'epoch': 3} {'type': 'loss', 'content': 0.03273782879114151, 'timestamp': '2025-09-10 02:29:59.980415', 'step': 6666, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 8068526078144}, 'timestamp': '2025-09-10 02:30:00.011943', 'step': 6666, 'epoch': 3} {'type': 'loss', 'content': 0.0009771647164598107, 'timestamp': '2025-09-10 02:30:00.022056', 'step': 6667, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 7119318626112}, 'timestamp': '2025-09-10 02:30:00.053644', 'step': 6667, 'epoch': 3} {'type': 'loss', 'content': 0.0001550656888866797, 'timestamp': '2025-09-10 02:30:00.081954', 'step': 6668, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 8068526078144}, 'timestamp': '2025-09-10 02:30:00.115747', 'step': 6668, 'epoch': 3} {'type': 'loss', 'content': 6.419160490622744e-05, 'timestamp': '2025-09-10 02:30:00.123685', 'step': 6669, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 7593922352128}, 'timestamp': '2025-09-10 02:30:00.155047', 'step': 6669, 'epoch': 3} {'type': 'loss', 'content': 0.0010343643371015787, 'timestamp': '2025-09-10 02:30:00.162649', 'step': 6670, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 368], 'flops': 10916148434240}, 'timestamp': '2025-09-10 02:30:00.197090', 'step': 6670, 'epoch': 3} {'type': 'loss', 'content': 0.0004840478941332549, 'timestamp': '2025-09-10 02:30:00.210903', 'step': 6671, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 336], 'flops': 9966940982208}, 'timestamp': '2025-09-10 02:30:00.246281', 'step': 6671, 'epoch': 3} {'type': 'loss', 'content': 0.0007365471683442593, 'timestamp': '2025-09-10 02:30:00.280494', 'step': 6672, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 7593922352128}, 'timestamp': '2025-09-10 02:30:00.312642', 'step': 6672, 'epoch': 3} {'type': 'loss', 'content': 0.00023997330572456121, 'timestamp': '2025-09-10 02:30:00.318091', 'step': 6673, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 304], 'flops': 9017733530176}, 'timestamp': '2025-09-10 02:30:00.349431', 'step': 6673, 'epoch': 3} {'type': 'loss', 'content': 0.0003005491744261235, 'timestamp': '2025-09-10 02:30:00.361669', 'step': 6674, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 6644714900096}, 'timestamp': '2025-09-10 02:30:00.392866', 'step': 6674, 'epoch': 3} {'type': 'loss', 'content': 0.0004848405660595745, 'timestamp': '2025-09-10 02:30:00.399901', 'step': 6675, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 7593922352128}, 'timestamp': '2025-09-10 02:30:00.431202', 'step': 6675, 'epoch': 3} {'type': 'loss', 'content': 0.0005252750124782324, 'timestamp': '2025-09-10 02:30:00.459830', 'step': 6676, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 6170111174080}, 'timestamp': '2025-09-10 02:30:00.491167', 'step': 6676, 'epoch': 3} {'type': 'loss', 'content': 0.00031378321000374854, 'timestamp': '2025-09-10 02:30:00.495509', 'step': 6677, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 8068526078144}, 'timestamp': '2025-09-10 02:30:00.527111', 'step': 6677, 'epoch': 3} {'type': 'loss', 'content': 0.0002828229626175016, 'timestamp': '2025-09-10 02:30:00.537098', 'step': 6678, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 7119318626112}, 'timestamp': '2025-09-10 02:30:00.568785', 'step': 6678, 'epoch': 3} {'type': 'loss', 'content': 0.00015684754180256277, 'timestamp': '2025-09-10 02:30:00.576183', 'step': 6679, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 4746299996032}, 'timestamp': '2025-09-10 02:30:00.606885', 'step': 6679, 'epoch': 3} {'type': 'loss', 'content': 0.0003511524701025337, 'timestamp': '2025-09-10 02:30:00.630735', 'step': 6680, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 320], 'flops': 9492337256192}, 'timestamp': '2025-09-10 02:30:00.662482', 'step': 6680, 'epoch': 3} {'type': 'loss', 'content': 0.00010571930761216208, 'timestamp': '2025-09-10 02:30:00.672718', 'step': 6681, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 176], 'flops': 5220903722048}, 'timestamp': '2025-09-10 02:30:00.705496', 'step': 6681, 'epoch': 3} {'type': 'loss', 'content': 0.00010457936878083274, 'timestamp': '2025-09-10 02:30:00.709653', 'step': 6682, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 6170111174080}, 'timestamp': '2025-09-10 02:30:00.740382', 'step': 6682, 'epoch': 3} {'type': 'loss', 'content': 0.00036585141788236797, 'timestamp': '2025-09-10 02:30:00.747032', 'step': 6683, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 6170111174080}, 'timestamp': '2025-09-10 02:30:00.782833', 'step': 6683, 'epoch': 3} {'type': 'loss', 'content': 0.00012327669537626207, 'timestamp': '2025-09-10 02:30:00.810543', 'step': 6684, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 4746299996032}, 'timestamp': '2025-09-10 02:30:00.841504', 'step': 6684, 'epoch': 3} {'type': 'loss', 'content': 0.00021850709163118154, 'timestamp': '2025-09-10 02:30:00.844142', 'step': 6685, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 304], 'flops': 9017733530176}, 'timestamp': '2025-09-10 02:30:00.875302', 'step': 6685, 'epoch': 3} {'type': 'loss', 'content': 0.00018914879183284938, 'timestamp': '2025-09-10 02:30:00.887630', 'step': 6686, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-10 02:30:00.918893', 'step': 6686, 'epoch': 3} {'type': 'loss', 'content': 0.0001374257553834468, 'timestamp': '2025-09-10 02:30:00.921422', 'step': 6687, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 384], 'flops': 11390752160256}, 'timestamp': '2025-09-10 02:30:00.957894', 'step': 6687, 'epoch': 3} {'type': 'loss', 'content': 0.00024864732404239476, 'timestamp': '2025-09-10 02:30:00.992738', 'step': 6688, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 176], 'flops': 5220903722048}, 'timestamp': '2025-09-10 02:30:01.024373', 'step': 6688, 'epoch': 3} {'type': 'loss', 'content': 0.009503054432570934, 'timestamp': '2025-09-10 02:30:01.028078', 'step': 6689, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 7593922352128}, 'timestamp': '2025-09-10 02:30:01.063225', 'step': 6689, 'epoch': 3} {'type': 'loss', 'content': 0.0002550124190747738, 'timestamp': '2025-09-10 02:30:01.070513', 'step': 6690, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 432], 'flops': 12814563338304}, 'timestamp': '2025-09-10 02:30:01.109691', 'step': 6690, 'epoch': 3} {'type': 'loss', 'content': 0.0004109316796530038, 'timestamp': '2025-09-10 02:30:01.125868', 'step': 6691, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 7593922352128}, 'timestamp': '2025-09-10 02:30:01.157073', 'step': 6691, 'epoch': 3} {'type': 'loss', 'content': 0.00036797040957026184, 'timestamp': '2025-09-10 02:30:01.185883', 'step': 6692, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 8068526078144}, 'timestamp': '2025-09-10 02:30:01.216983', 'step': 6692, 'epoch': 3} {'type': 'loss', 'content': 0.00011126509343739599, 'timestamp': '2025-09-10 02:30:01.224967', 'step': 6693, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 7593922352128}, 'timestamp': '2025-09-10 02:30:01.257817', 'step': 6693, 'epoch': 3} {'type': 'loss', 'content': 7.454932347172871e-05, 'timestamp': '2025-09-10 02:30:01.265509', 'step': 6694, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 7119318626112}, 'timestamp': '2025-09-10 02:30:01.296459', 'step': 6694, 'epoch': 3} {'type': 'loss', 'content': 0.00021754649060312659, 'timestamp': '2025-09-10 02:30:01.303768', 'step': 6695, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 4746299996032}, 'timestamp': '2025-09-10 02:30:01.334621', 'step': 6695, 'epoch': 3} {'type': 'loss', 'content': 0.00014526637096423656, 'timestamp': '2025-09-10 02:30:01.358375', 'step': 6696, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 448], 'flops': 13289167064320}, 'timestamp': '2025-09-10 02:30:01.396064', 'step': 6696, 'epoch': 3} {'type': 'loss', 'content': 0.0002652324619702995, 'timestamp': '2025-09-10 02:30:01.411973', 'step': 6697, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 6170111174080}, 'timestamp': '2025-09-10 02:30:01.443004', 'step': 6697, 'epoch': 3} {'type': 'loss', 'content': 0.00011327861284371465, 'timestamp': '2025-09-10 02:30:01.449826', 'step': 6698, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 7119318626112}, 'timestamp': '2025-09-10 02:30:01.481727', 'step': 6698, 'epoch': 3} {'type': 'loss', 'content': 0.00028877213480882347, 'timestamp': '2025-09-10 02:30:01.489092', 'step': 6699, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 5695507448064}, 'timestamp': '2025-09-10 02:30:01.520717', 'step': 6699, 'epoch': 3} {'type': 'loss', 'content': 0.00022058105969335884, 'timestamp': '2025-09-10 02:30:01.546093', 'step': 6700, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 6644714900096}, 'timestamp': '2025-09-10 02:30:01.577682', 'step': 6700, 'epoch': 3} {'type': 'loss', 'content': 0.0002654526033438742, 'timestamp': '2025-09-10 02:30:01.582188', 'step': 6701, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 6170111174080}, 'timestamp': '2025-09-10 02:30:01.615409', 'step': 6701, 'epoch': 3} {'type': 'loss', 'content': 0.00038097609649412334, 'timestamp': '2025-09-10 02:30:01.622347', 'step': 6702, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 6170111174080}, 'timestamp': '2025-09-10 02:30:01.653294', 'step': 6702, 'epoch': 3} {'type': 'loss', 'content': 0.00023465848062187433, 'timestamp': '2025-09-10 02:30:01.660185', 'step': 6703, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 8068526078144}, 'timestamp': '2025-09-10 02:30:01.692737', 'step': 6703, 'epoch': 3} {'type': 'loss', 'content': 0.00016869421233423054, 'timestamp': '2025-09-10 02:30:01.723948', 'step': 6704, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 6644714900096}, 'timestamp': '2025-09-10 02:30:01.755326', 'step': 6704, 'epoch': 3} {'type': 'loss', 'content': 0.0002982628939207643, 'timestamp': '2025-09-10 02:30:01.760029', 'step': 6705, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 320], 'flops': 9492337256192}, 'timestamp': '2025-09-10 02:30:01.796033', 'step': 6705, 'epoch': 3} {'type': 'loss', 'content': 0.00042490853229537606, 'timestamp': '2025-09-10 02:30:01.808562', 'step': 6706, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 5695507448064}, 'timestamp': '2025-09-10 02:30:01.842911', 'step': 6706, 'epoch': 3} {'type': 'loss', 'content': 0.00036351257585920393, 'timestamp': '2025-09-10 02:30:01.847262', 'step': 6707, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 6170111174080}, 'timestamp': '2025-09-10 02:30:01.878290', 'step': 6707, 'epoch': 3} {'type': 'loss', 'content': 0.00016362879250664264, 'timestamp': '2025-09-10 02:30:01.906042', 'step': 6708, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 7119318626112}, 'timestamp': '2025-09-10 02:30:01.937236', 'step': 6708, 'epoch': 3} {'type': 'loss', 'content': 0.00014777052274439484, 'timestamp': '2025-09-10 02:30:01.942279', 'step': 6709, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 176], 'flops': 5220903722048}, 'timestamp': '2025-09-10 02:30:01.973464', 'step': 6709, 'epoch': 3} {'type': 'loss', 'content': 0.00013149731967132539, 'timestamp': '2025-09-10 02:30:01.979403', 'step': 6710, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 7593922352128}, 'timestamp': '2025-09-10 02:30:02.020841', 'step': 6710, 'epoch': 3} {'type': 'loss', 'content': 9.87174644251354e-05, 'timestamp': '2025-09-10 02:30:02.028460', 'step': 6711, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 8068526078144}, 'timestamp': '2025-09-10 02:30:02.059923', 'step': 6711, 'epoch': 3} {'type': 'loss', 'content': 9.164093353319913e-05, 'timestamp': '2025-09-10 02:30:02.097837', 'step': 6712, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 5695507448064}, 'timestamp': '2025-09-10 02:30:02.163435', 'step': 6712, 'epoch': 3} {'type': 'loss', 'content': 5.91975731367711e-05, 'timestamp': '2025-09-10 02:30:02.171839', 'step': 6713, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 304], 'flops': 9017733530176}, 'timestamp': '2025-09-10 02:30:02.215126', 'step': 6713, 'epoch': 3} {'type': 'loss', 'content': 0.00025522714713588357, 'timestamp': '2025-09-10 02:30:02.227085', 'step': 6714, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 320], 'flops': 9492337256192}, 'timestamp': '2025-09-10 02:30:02.266867', 'step': 6714, 'epoch': 3} {'type': 'loss', 'content': 0.0016006106743589044, 'timestamp': '2025-09-10 02:30:02.279452', 'step': 6715, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 176], 'flops': 5220903722048}, 'timestamp': '2025-09-10 02:30:02.321903', 'step': 6715, 'epoch': 3} {'type': 'loss', 'content': 5.307100946083665e-05, 'timestamp': '2025-09-10 02:30:02.346711', 'step': 6716, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 5695507448064}, 'timestamp': '2025-09-10 02:30:02.381968', 'step': 6716, 'epoch': 3} {'type': 'loss', 'content': 9.852695802692324e-05, 'timestamp': '2025-09-10 02:30:02.384511', 'step': 6717, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 6170111174080}, 'timestamp': '2025-09-10 02:30:02.418468', 'step': 6717, 'epoch': 3} {'type': 'loss', 'content': 0.0003025185433216393, 'timestamp': '2025-09-10 02:30:02.425355', 'step': 6718, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 320], 'flops': 9492337256192}, 'timestamp': '2025-09-10 02:30:02.456597', 'step': 6718, 'epoch': 3} {'type': 'loss', 'content': 0.00015510991215705872, 'timestamp': '2025-09-10 02:30:02.469113', 'step': 6719, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 6170111174080}, 'timestamp': '2025-09-10 02:30:02.500955', 'step': 6719, 'epoch': 3} {'type': 'loss', 'content': 0.0001259516429854557, 'timestamp': '2025-09-10 02:30:02.528547', 'step': 6720, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 5695507448064}, 'timestamp': '2025-09-10 02:30:02.559964', 'step': 6720, 'epoch': 3} {'type': 'loss', 'content': 0.0008776256581768394, 'timestamp': '2025-09-10 02:30:02.562312', 'step': 6721, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 8543129804160}, 'timestamp': '2025-09-10 02:30:02.594792', 'step': 6721, 'epoch': 3} {'type': 'loss', 'content': 7.6968630310148e-05, 'timestamp': '2025-09-10 02:30:02.605651', 'step': 6722, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 8068526078144}, 'timestamp': '2025-09-10 02:30:02.638245', 'step': 6722, 'epoch': 3} {'type': 'loss', 'content': 0.000490417645778507, 'timestamp': '2025-09-10 02:30:02.648085', 'step': 6723, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 7593922352128}, 'timestamp': '2025-09-10 02:30:02.679494', 'step': 6723, 'epoch': 3} {'type': 'loss', 'content': 0.0023274854756891727, 'timestamp': '2025-09-10 02:30:02.708135', 'step': 6724, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-10 02:30:02.738780', 'step': 6724, 'epoch': 3} {'type': 'loss', 'content': 7.722565351286903e-05, 'timestamp': '2025-09-10 02:30:02.741949', 'step': 6725, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 6170111174080}, 'timestamp': '2025-09-10 02:30:02.775968', 'step': 6725, 'epoch': 3} {'type': 'loss', 'content': 0.0001424977817805484, 'timestamp': '2025-09-10 02:30:02.782594', 'step': 6726, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 6170111174080}, 'timestamp': '2025-09-10 02:30:02.814355', 'step': 6726, 'epoch': 3} {'type': 'loss', 'content': 0.0002022543194470927, 'timestamp': '2025-09-10 02:30:02.820951', 'step': 6727, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 4746299996032}, 'timestamp': '2025-09-10 02:30:02.852914', 'step': 6727, 'epoch': 3} {'type': 'loss', 'content': 0.0003710582968778908, 'timestamp': '2025-09-10 02:30:02.876861', 'step': 6728, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 5695507448064}, 'timestamp': '2025-09-10 02:30:02.909251', 'step': 6728, 'epoch': 3} {'type': 'loss', 'content': 0.00013393805420491844, 'timestamp': '2025-09-10 02:30:02.911317', 'step': 6729, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 7593922352128}, 'timestamp': '2025-09-10 02:30:02.942365', 'step': 6729, 'epoch': 3} {'type': 'loss', 'content': 0.00025334549718536437, 'timestamp': '2025-09-10 02:30:02.949941', 'step': 6730, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 5695507448064}, 'timestamp': '2025-09-10 02:30:02.981314', 'step': 6730, 'epoch': 3} {'type': 'loss', 'content': 0.0015375103102996945, 'timestamp': '2025-09-10 02:30:02.985634', 'step': 6731, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 7593922352128}, 'timestamp': '2025-09-10 02:30:03.016998', 'step': 6731, 'epoch': 3} {'type': 'loss', 'content': 0.00120832200627774, 'timestamp': '2025-09-10 02:30:03.045410', 'step': 6732, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 5695507448064}, 'timestamp': '2025-09-10 02:30:03.078789', 'step': 6732, 'epoch': 3} {'type': 'loss', 'content': 0.0019324652384966612, 'timestamp': '2025-09-10 02:30:03.081264', 'step': 6733, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 4746299996032}, 'timestamp': '2025-09-10 02:30:03.112557', 'step': 6733, 'epoch': 3} {'type': 'loss', 'content': 0.00045514092198573053, 'timestamp': '2025-09-10 02:30:03.115146', 'step': 6734, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 8543129804160}, 'timestamp': '2025-09-10 02:30:03.146534', 'step': 6734, 'epoch': 3} {'type': 'loss', 'content': 0.0002553352096583694, 'timestamp': '2025-09-10 02:30:03.157281', 'step': 6735, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 352], 'flops': 10441544708224}, 'timestamp': '2025-09-10 02:30:03.192725', 'step': 6735, 'epoch': 3} {'type': 'loss', 'content': 0.00010565890261204913, 'timestamp': '2025-09-10 02:30:03.227326', 'step': 6736, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 352], 'flops': 10441544708224}, 'timestamp': '2025-09-10 02:30:03.262594', 'step': 6736, 'epoch': 3} {'type': 'loss', 'content': 0.00019748820341192186, 'timestamp': '2025-09-10 02:30:03.275653', 'step': 6737, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 7119318626112}, 'timestamp': '2025-09-10 02:30:03.308506', 'step': 6737, 'epoch': 3} {'type': 'loss', 'content': 0.00015901295410003513, 'timestamp': '2025-09-10 02:30:03.315595', 'step': 6738, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 352], 'flops': 10441544708224}, 'timestamp': '2025-09-10 02:30:03.356029', 'step': 6738, 'epoch': 3} {'type': 'loss', 'content': 0.00040654095937497914, 'timestamp': '2025-09-10 02:30:03.369737', 'step': 6739, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 7593922352128}, 'timestamp': '2025-09-10 02:30:03.401756', 'step': 6739, 'epoch': 3} {'type': 'loss', 'content': 7.327982893912122e-05, 'timestamp': '2025-09-10 02:30:03.430063', 'step': 6740, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 7119318626112}, 'timestamp': '2025-09-10 02:30:03.465677', 'step': 6740, 'epoch': 3} {'type': 'loss', 'content': 0.001900206902064383, 'timestamp': '2025-09-10 02:30:03.472042', 'step': 6741, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 6644714900096}, 'timestamp': '2025-09-10 02:30:03.509158', 'step': 6741, 'epoch': 3} {'type': 'loss', 'content': 0.00012812459317501634, 'timestamp': '2025-09-10 02:30:03.516098', 'step': 6742, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 176], 'flops': 5220903722048}, 'timestamp': '2025-09-10 02:30:03.547366', 'step': 6742, 'epoch': 3} {'type': 'loss', 'content': 0.00014215106784831733, 'timestamp': '2025-09-10 02:30:03.551339', 'step': 6743, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 4746299996032}, 'timestamp': '2025-09-10 02:30:03.583264', 'step': 6743, 'epoch': 3} {'type': 'loss', 'content': 0.0015154675347730517, 'timestamp': '2025-09-10 02:30:03.608006', 'step': 6744, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 7593922352128}, 'timestamp': '2025-09-10 02:30:03.639794', 'step': 6744, 'epoch': 3} {'type': 'loss', 'content': 0.00048416477511636913, 'timestamp': '2025-09-10 02:30:03.645121', 'step': 6745, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 336], 'flops': 9966940982208}, 'timestamp': '2025-09-10 02:30:03.680280', 'step': 6745, 'epoch': 3} {'type': 'loss', 'content': 0.0016691208584234118, 'timestamp': '2025-09-10 02:30:03.693642', 'step': 6746, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 7593922352128}, 'timestamp': '2025-09-10 02:30:03.726243', 'step': 6746, 'epoch': 3} {'type': 'loss', 'content': 0.0012729717418551445, 'timestamp': '2025-09-10 02:30:03.733692', 'step': 6747, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 320], 'flops': 9492337256192}, 'timestamp': '2025-09-10 02:30:03.766399', 'step': 6747, 'epoch': 3} {'type': 'loss', 'content': 6.610819400520995e-05, 'timestamp': '2025-09-10 02:30:03.799709', 'step': 6748, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 6644714900096}, 'timestamp': '2025-09-10 02:30:03.831631', 'step': 6748, 'epoch': 3} {'type': 'loss', 'content': 0.0004302055749576539, 'timestamp': '2025-09-10 02:30:03.836191', 'step': 6749, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 528], 'flops': 15662185694400}, 'timestamp': '2025-09-10 02:30:03.882183', 'step': 6749, 'epoch': 3} {'type': 'loss', 'content': 0.00018864336016122252, 'timestamp': '2025-09-10 02:30:03.901351', 'step': 6750, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 7119318626112}, 'timestamp': '2025-09-10 02:30:03.933412', 'step': 6750, 'epoch': 3} {'type': 'loss', 'content': 0.00017309685063082725, 'timestamp': '2025-09-10 02:30:03.940633', 'step': 6751, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 336], 'flops': 9966940982208}, 'timestamp': '2025-09-10 02:30:03.974347', 'step': 6751, 'epoch': 3} {'type': 'loss', 'content': 6.622447835979983e-05, 'timestamp': '2025-09-10 02:30:04.008600', 'step': 6752, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 6170111174080}, 'timestamp': '2025-09-10 02:30:04.039796', 'step': 6752, 'epoch': 3} {'type': 'loss', 'content': 0.0003488397051114589, 'timestamp': '2025-09-10 02:30:04.044117', 'step': 6753, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 7119318626112}, 'timestamp': '2025-09-10 02:30:04.076402', 'step': 6753, 'epoch': 3} {'type': 'loss', 'content': 0.00013236506492830813, 'timestamp': '2025-09-10 02:30:04.083647', 'step': 6754, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 7119318626112}, 'timestamp': '2025-09-10 02:30:04.115194', 'step': 6754, 'epoch': 3} {'type': 'loss', 'content': 0.00018496920529287308, 'timestamp': '2025-09-10 02:30:04.122414', 'step': 6755, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 368], 'flops': 10916148434240}, 'timestamp': '2025-09-10 02:30:04.157677', 'step': 6755, 'epoch': 3} {'type': 'loss', 'content': 0.00016140654042828828, 'timestamp': '2025-09-10 02:30:04.192428', 'step': 6756, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 6170111174080}, 'timestamp': '2025-09-10 02:30:04.224380', 'step': 6756, 'epoch': 3} {'type': 'loss', 'content': 6.932354153832421e-05, 'timestamp': '2025-09-10 02:30:04.228472', 'step': 6757, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 416], 'flops': 12339959612288}, 'timestamp': '2025-09-10 02:30:04.268473', 'step': 6757, 'epoch': 3} {'type': 'loss', 'content': 0.0006969981477595866, 'timestamp': '2025-09-10 02:30:04.284320', 'step': 6758, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 336], 'flops': 9966940982208}, 'timestamp': '2025-09-10 02:30:04.320015', 'step': 6758, 'epoch': 3} {'type': 'loss', 'content': 6.715174822602421e-05, 'timestamp': '2025-09-10 02:30:04.333442', 'step': 6759, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 5695507448064}, 'timestamp': '2025-09-10 02:30:04.366619', 'step': 6759, 'epoch': 3} {'type': 'loss', 'content': 0.00027297786436975, 'timestamp': '2025-09-10 02:30:04.392481', 'step': 6760, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 5695507448064}, 'timestamp': '2025-09-10 02:30:04.422941', 'step': 6760, 'epoch': 3} {'type': 'loss', 'content': 4.975103001925163e-05, 'timestamp': '2025-09-10 02:30:04.425156', 'step': 6761, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 8068526078144}, 'timestamp': '2025-09-10 02:30:04.456491', 'step': 6761, 'epoch': 3} {'type': 'loss', 'content': 0.00024496050900779665, 'timestamp': '2025-09-10 02:30:04.466667', 'step': 6762, 'epoch': 3} {'type': 'flops', 'content': [{'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5695213340928}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3796808916480}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4429610391296}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4746011128704}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4746011128704}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6011614078336}, {'type': 'perplexity', 'in_batch_dim': [8, 736], 'batch_size': 8, 'flops': 14554433988352}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3796808916480}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4113209653888}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3796808916480}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4746011128704}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2847606704256}, {'type': 'perplexity', 'in_batch_dim': [8, 320], 'batch_size': 8, 'flops': 6328014815744}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5378812603520}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3796808916480}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4746011128704}, {'type': 'perplexity', 'in_batch_dim': [8, 352], 'batch_size': 8, 'flops': 6960816290560}, {'type': 'perplexity', 'in_batch_dim': [8, 384], 'batch_size': 8, 'flops': 7593617765376}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5378812603520}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4429610391296}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4113209653888}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5062411866112}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5062411866112}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4429610391296}, {'type': 'perplexity', 'in_batch_dim': [8, 480], 'batch_size': 8, 'flops': 9492022189824}, {'type': 'perplexity', 'in_batch_dim': [8, 448], 'batch_size': 8, 'flops': 8859220715008}, {'type': 'perplexity', 'in_batch_dim': [8, 320], 'batch_size': 8, 'flops': 6328014815744}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3796808916480}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6011614078336}, {'type': 'perplexity', 'in_batch_dim': [8, 320], 'batch_size': 8, 'flops': 6328014815744}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5695213340928}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4746011128704}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4429610391296}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5378812603520}, {'type': 'perplexity', 'in_batch_dim': [8, 544], 'batch_size': 8, 'flops': 10757625139456}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4429610391296}, {'type': 'perplexity', 'in_batch_dim': [8, 320], 'batch_size': 8, 'flops': 6328014815744}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4113209653888}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4113209653888}, {'type': 'perplexity', 'in_batch_dim': [8, 320], 'batch_size': 8, 'flops': 6328014815744}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3480408179072}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5062411866112}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6011614078336}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4429610391296}, {'type': 'perplexity', 'in_batch_dim': [8, 384], 'batch_size': 8, 'flops': 7593617765376}, {'type': 'perplexity', 'in_batch_dim': [8, 416], 'batch_size': 8, 'flops': 8226419240192}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4746011128704}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3480408179072}, {'type': 'perplexity', 'in_batch_dim': [8, 384], 'batch_size': 8, 'flops': 7593617765376}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3480408179072}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3480408179072}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5378812603520}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3796808916480}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4113209653888}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5062411866112}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5378812603520}, {'type': 'perplexity', 'in_batch_dim': [8, 320], 'batch_size': 8, 'flops': 6328014815744}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4429610391296}, {'type': 'perplexity', 'in_batch_dim': [8, 160], 'batch_size': 8, 'flops': 3164007441664}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4113209653888}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5695213340928}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3796808916480}, {'type': 'perplexity', 'in_batch_dim': [8, 160], 'batch_size': 8, 'flops': 3164007441664}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6011614078336}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5062411866112}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4429610391296}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3480408179072}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4746011128704}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3480408179072}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4113209653888}, {'type': 'perplexity', 'in_batch_dim': [8, 624], 'batch_size': 8, 'flops': 12339628826496}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4113209653888}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5695213340928}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5062411866112}, {'type': 'perplexity', 'in_batch_dim': [8, 384], 'batch_size': 8, 'flops': 7593617765376}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4429610391296}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4113209653888}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5062411866112}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4429610391296}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5695213340928}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3796808916480}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5378812603520}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4429610391296}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5378812603520}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3796808916480}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5695213340928}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4429610391296}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5062411866112}, {'type': 'perplexity', 'in_batch_dim': [8, 160], 'batch_size': 8, 'flops': 3164007441664}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4746011128704}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5695213340928}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3796808916480}, {'type': 'perplexity', 'in_batch_dim': [8, 416], 'batch_size': 8, 'flops': 8226419240192}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3796808916480}, {'type': 'perplexity', 'in_batch_dim': [8, 160], 'batch_size': 8, 'flops': 3164007441664}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4746011128704}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3796808916480}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2847606704256}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5378812603520}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4429610391296}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4113209653888}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4429610391296}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6011614078336}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4113209653888}, {'type': 'perplexity', 'in_batch_dim': [8, 320], 'batch_size': 8, 'flops': 6328014815744}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4746011128704}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4746011128704}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3796808916480}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5062411866112}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3480408179072}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3796808916480}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3480408179072}, {'type': 'perplexity', 'in_batch_dim': [8, 352], 'batch_size': 8, 'flops': 6960816290560}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5062411866112}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4429610391296}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6011614078336}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5062411866112}, {'type': 'perplexity', 'in_batch_dim': [8, 576], 'batch_size': 8, 'flops': 11390426614272}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4429610391296}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3796808916480}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6011614078336}, {'type': 'perplexity', 'in_batch_dim': [8, 432], 'batch_size': 8, 'flops': 8542819977600}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5378812603520}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5378812603520}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5062411866112}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4113209653888}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4113209653888}, {'type': 'perplexity', 'in_batch_dim': [8, 368], 'batch_size': 8, 'flops': 7277217027968}, {'type': 'perplexity', 'in_batch_dim': [8, 160], 'batch_size': 8, 'flops': 3164007441664}, {'type': 'perplexity', 'in_batch_dim': [8, 368], 'batch_size': 8, 'flops': 7277217027968}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3796808916480}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4746011128704}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4113209653888}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4429610391296}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5695213340928}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5695213340928}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5062411866112}, {'type': 'perplexity', 'in_batch_dim': [8, 432], 'batch_size': 8, 'flops': 8542819977600}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5062411866112}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4113209653888}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3480408179072}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3480408179072}, {'type': 'perplexity', 'in_batch_dim': [8, 160], 'batch_size': 8, 'flops': 3164007441664}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4113209653888}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3796808916480}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5062411866112}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 368], 'batch_size': 8, 'flops': 7277217027968}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5378812603520}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5378812603520}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5062411866112}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2847606704256}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4429610391296}, {'type': 'perplexity', 'in_batch_dim': [8, 320], 'batch_size': 8, 'flops': 6328014815744}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3796808916480}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4429610391296}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4113209653888}, {'type': 'perplexity', 'in_batch_dim': [8, 384], 'batch_size': 8, 'flops': 7593617765376}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5378812603520}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4746011128704}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3480408179072}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5062411866112}, {'type': 'perplexity', 'in_batch_dim': [8, 320], 'batch_size': 8, 'flops': 6328014815744}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5378812603520}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4113209653888}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4113209653888}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5695213340928}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3480408179072}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6011614078336}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4746011128704}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5378812603520}, {'type': 'perplexity', 'in_batch_dim': [8, 320], 'batch_size': 8, 'flops': 6328014815744}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4429610391296}, {'type': 'perplexity', 'in_batch_dim': [8, 320], 'batch_size': 8, 'flops': 6328014815744}, {'type': 'perplexity', 'in_batch_dim': [8, 416], 'batch_size': 8, 'flops': 8226419240192}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5062411866112}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5062411866112}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2847606704256}, {'type': 'perplexity', 'in_batch_dim': [8, 336], 'batch_size': 8, 'flops': 6644415553152}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6011614078336}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5062411866112}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4113209653888}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5695213340928}, {'type': 'perplexity', 'in_batch_dim': [8, 416], 'batch_size': 8, 'flops': 8226419240192}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3796808916480}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5695213340928}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4746011128704}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4429610391296}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4113209653888}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3480408179072}, {'type': 'perplexity', 'in_batch_dim': [8, 320], 'batch_size': 8, 'flops': 6328014815744}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3480408179072}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5378812603520}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6011614078336}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4429610391296}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5695213340928}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5695213340928}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3480408179072}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4429610391296}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5062411866112}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3480408179072}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4113209653888}, {'type': 'perplexity', 'in_batch_dim': [8, 352], 'batch_size': 8, 'flops': 6960816290560}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4113209653888}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2847606704256}, {'type': 'perplexity', 'in_batch_dim': [8, 160], 'batch_size': 8, 'flops': 3164007441664}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3796808916480}, {'type': 'perplexity', 'in_batch_dim': [8, 496], 'batch_size': 8, 'flops': 9808422927232}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3480408179072}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4113209653888}, {'type': 'perplexity', 'in_batch_dim': [8, 544], 'batch_size': 8, 'flops': 10757625139456}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6011614078336}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5378812603520}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5695213340928}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4746011128704}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5695213340928}, {'type': 'perplexity', 'in_batch_dim': [8, 384], 'batch_size': 8, 'flops': 7593617765376}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3480408179072}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5378812603520}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3480408179072}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5378812603520}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5062411866112}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3796808916480}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5062411866112}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5695213340928}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5378812603520}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4429610391296}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5062411866112}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3480408179072}, {'type': 'perplexity', 'in_batch_dim': [8, 656], 'batch_size': 8, 'flops': 12972430301312}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5062411866112}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3796808916480}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3480408179072}, {'type': 'perplexity', 'in_batch_dim': [8, 160], 'batch_size': 8, 'flops': 3164007441664}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5695213340928}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6011614078336}, {'type': 'perplexity', 'in_batch_dim': [8, 320], 'batch_size': 8, 'flops': 6328014815744}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5062411866112}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5062411866112}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5695213340928}, {'type': 'perplexity', 'in_batch_dim': [8, 320], 'batch_size': 8, 'flops': 6328014815744}, {'type': 'perplexity', 'in_batch_dim': [8, 384], 'batch_size': 8, 'flops': 7593617765376}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5062411866112}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4746011128704}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4113209653888}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4746011128704}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4746011128704}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3480408179072}, {'type': 'perplexity', 'in_batch_dim': [8, 320], 'batch_size': 8, 'flops': 6328014815744}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5695213340928}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5062411866112}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4113209653888}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4746011128704}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2847606704256}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3796808916480}, {'type': 'perplexity', 'in_batch_dim': [8, 336], 'batch_size': 8, 'flops': 6644415553152}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4113209653888}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4746011128704}, {'type': 'perplexity', 'in_batch_dim': [8, 496], 'batch_size': 8, 'flops': 9808422927232}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5378812603520}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5378812603520}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4429610391296}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5062411866112}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6011614078336}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4746011128704}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3796808916480}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4113209653888}, {'type': 'perplexity', 'in_batch_dim': [8, 496], 'batch_size': 8, 'flops': 9808422927232}, {'type': 'perplexity', 'in_batch_dim': [8, 448], 'batch_size': 8, 'flops': 8859220715008}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6011614078336}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3796808916480}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3480408179072}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5062411866112}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4429610391296}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4113209653888}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4746011128704}, {'type': 'perplexity', 'in_batch_dim': [8, 320], 'batch_size': 8, 'flops': 6328014815744}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6011614078336}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3480408179072}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3796808916480}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3796808916480}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4746011128704}, {'type': 'perplexity', 'in_batch_dim': [8, 416], 'batch_size': 8, 'flops': 8226419240192}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5695213340928}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6011614078336}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6011614078336}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4746011128704}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3480408179072}, {'type': 'perplexity', 'in_batch_dim': [8, 160], 'batch_size': 8, 'flops': 3164007441664}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5695213340928}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3480408179072}, {'type': 'perplexity', 'in_batch_dim': [8, 400], 'batch_size': 8, 'flops': 7910018502784}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4429610391296}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5378812603520}, {'type': 'perplexity', 'in_batch_dim': [8, 400], 'batch_size': 8, 'flops': 7910018502784}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5378812603520}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3480408179072}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6011614078336}, {'type': 'perplexity', 'in_batch_dim': [8, 160], 'batch_size': 8, 'flops': 3164007441664}, {'type': 'perplexity', 'in_batch_dim': [8, 352], 'batch_size': 8, 'flops': 6960816290560}, {'type': 'perplexity', 'in_batch_dim': [8, 320], 'batch_size': 8, 'flops': 6328014815744}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5062411866112}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5062411866112}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4113209653888}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4429610391296}, {'type': 'perplexity', 'in_batch_dim': [8, 352], 'batch_size': 8, 'flops': 6960816290560}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4113209653888}, {'type': 'perplexity', 'in_batch_dim': [8, 160], 'batch_size': 8, 'flops': 3164007441664}, {'type': 'perplexity', 'in_batch_dim': [8, 336], 'batch_size': 8, 'flops': 6644415553152}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4429610391296}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3796808916480}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5378812603520}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5378812603520}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4113209653888}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5378812603520}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3796808916480}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5378812603520}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5378812603520}, {'type': 'perplexity', 'in_batch_dim': [8, 160], 'batch_size': 8, 'flops': 3164007441664}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3480408179072}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6011614078336}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5695213340928}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5378812603520}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5378812603520}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4429610391296}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4746011128704}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6011614078336}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5062411866112}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6011614078336}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5378812603520}, {'type': 'perplexity', 'in_batch_dim': [8, 160], 'batch_size': 8, 'flops': 3164007441664}, {'type': 'perplexity', 'in_batch_dim': [8, 160], 'batch_size': 8, 'flops': 3164007441664}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5062411866112}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5062411866112}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3796808916480}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6011614078336}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4113209653888}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4746011128704}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5378812603520}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3480408179072}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3796808916480}, {'type': 'perplexity', 'in_batch_dim': [8, 320], 'batch_size': 8, 'flops': 6328014815744}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5062411866112}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4746011128704}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5062411866112}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4746011128704}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6011614078336}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3796808916480}, {'type': 'perplexity', 'in_batch_dim': [8, 160], 'batch_size': 8, 'flops': 3164007441664}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2847606704256}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4429610391296}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4746011128704}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5062411866112}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5378812603520}, {'type': 'perplexity', 'in_batch_dim': [8, 464], 'batch_size': 8, 'flops': 9175621452416}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4113209653888}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3480408179072}, {'type': 'perplexity', 'in_batch_dim': [8, 384], 'batch_size': 8, 'flops': 7593617765376}, {'type': 'perplexity', 'in_batch_dim': [8, 448], 'batch_size': 8, 'flops': 8859220715008}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5378812603520}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4113209653888}, {'type': 'perplexity', 'in_batch_dim': [8, 320], 'batch_size': 8, 'flops': 6328014815744}, {'type': 'perplexity', 'in_batch_dim': [8, 560], 'batch_size': 8, 'flops': 11074025876864}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5378812603520}, {'type': 'perplexity', 'in_batch_dim': [8, 160], 'batch_size': 8, 'flops': 3164007441664}, {'type': 'perplexity', 'in_batch_dim': [8, 320], 'batch_size': 8, 'flops': 6328014815744}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5062411866112}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6011614078336}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4746011128704}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6011614078336}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3796808916480}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4429610391296}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4113209653888}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4746011128704}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3480408179072}, {'type': 'perplexity', 'in_batch_dim': [8, 384], 'batch_size': 8, 'flops': 7593617765376}, {'type': 'perplexity', 'in_batch_dim': [8, 576], 'batch_size': 8, 'flops': 11390426614272}, {'type': 'perplexity', 'in_batch_dim': [8, 160], 'batch_size': 8, 'flops': 3164007441664}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4746011128704}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4746011128704}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4429610391296}, {'type': 'perplexity', 'in_batch_dim': [8, 336], 'batch_size': 8, 'flops': 6644415553152}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4746011128704}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3796808916480}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3796808916480}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5062411866112}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5378812603520}, {'type': 'perplexity', 'in_batch_dim': [8, 368], 'batch_size': 8, 'flops': 7277217027968}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3480408179072}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4113209653888}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4746011128704}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3480408179072}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4746011128704}, {'type': 'perplexity', 'in_batch_dim': [8, 1168], 'batch_size': 8, 'flops': 23097253898368}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5062411866112}, {'type': 'perplexity', 'in_batch_dim': [8, 160], 'batch_size': 8, 'flops': 3164007441664}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4746011128704}, {'type': 'perplexity', 'in_batch_dim': [8, 640], 'batch_size': 8, 'flops': 12656029563904}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5378812603520}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4429610391296}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4429610391296}, {'type': 'perplexity', 'in_batch_dim': [8, 384], 'batch_size': 8, 'flops': 7593617765376}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3480408179072}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4113209653888}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3796808916480}, {'type': 'perplexity', 'in_batch_dim': [8, 336], 'batch_size': 8, 'flops': 6644415553152}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3480408179072}, {'type': 'perplexity', 'in_batch_dim': [6, 208], 'batch_size': 8, 'flops': 4113209653888}], 'timestamp': '2025-09-10 02:30:15.292289', 'step': 6762, 'epoch': 3} {'type': 'pplx', 'content': 23855258.0813289, 'timestamp': '2025-09-10 02:30:15.295377', 'step': 6762, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 176], 'flops': 5220903722048}, 'timestamp': '2025-09-10 02:30:15.326564', 'step': 6762, 'epoch': 3} {'type': 'loss', 'content': 0.00013265803863760084, 'timestamp': '2025-09-10 02:30:15.329741', 'step': 6763, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 7119318626112}, 'timestamp': '2025-09-10 02:30:15.364987', 'step': 6763, 'epoch': 3} {'type': 'loss', 'content': 0.0006455808761529624, 'timestamp': '2025-09-10 02:30:15.393246', 'step': 6764, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 336], 'flops': 9966940982208}, 'timestamp': '2025-09-10 02:30:15.425460', 'step': 6764, 'epoch': 3} {'type': 'loss', 'content': 0.0010767403291538358, 'timestamp': '2025-09-10 02:30:15.438173', 'step': 6765, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 7119318626112}, 'timestamp': '2025-09-10 02:30:15.469805', 'step': 6765, 'epoch': 3} {'type': 'loss', 'content': 0.00016831964603625238, 'timestamp': '2025-09-10 02:30:15.477267', 'step': 6766, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 5695507448064}, 'timestamp': '2025-09-10 02:30:15.540508', 'step': 6766, 'epoch': 3} {'type': 'loss', 'content': 7.969191210577264e-05, 'timestamp': '2025-09-10 02:30:15.547193', 'step': 6767, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 8543129804160}, 'timestamp': '2025-09-10 02:30:15.602576', 'step': 6767, 'epoch': 3} {'type': 'loss', 'content': 0.008119060657918453, 'timestamp': '2025-09-10 02:30:15.634471', 'step': 6768, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 4746299996032}, 'timestamp': '2025-09-10 02:30:15.686556', 'step': 6768, 'epoch': 3} {'type': 'loss', 'content': 0.0025774035602808, 'timestamp': '2025-09-10 02:30:15.690953', 'step': 6769, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 6170111174080}, 'timestamp': '2025-09-10 02:30:15.732130', 'step': 6769, 'epoch': 3} {'type': 'loss', 'content': 0.0003182920045219362, 'timestamp': '2025-09-10 02:30:15.738838', 'step': 6770, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 304], 'flops': 9017733530176}, 'timestamp': '2025-09-10 02:30:15.772239', 'step': 6770, 'epoch': 3} {'type': 'loss', 'content': 0.00016758790297899395, 'timestamp': '2025-09-10 02:30:15.784544', 'step': 6771, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 8068526078144}, 'timestamp': '2025-09-10 02:30:15.815462', 'step': 6771, 'epoch': 3} {'type': 'loss', 'content': 0.0013579537626355886, 'timestamp': '2025-09-10 02:30:15.846516', 'step': 6772, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 8068526078144}, 'timestamp': '2025-09-10 02:30:15.881090', 'step': 6772, 'epoch': 3} {'type': 'loss', 'content': 0.0003011829103343189, 'timestamp': '2025-09-10 02:30:15.888746', 'step': 6773, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 336], 'flops': 9966940982208}, 'timestamp': '2025-09-10 02:30:15.922440', 'step': 6773, 'epoch': 3} {'type': 'loss', 'content': 0.0005984340095892549, 'timestamp': '2025-09-10 02:30:15.935758', 'step': 6774, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 304], 'flops': 9017733530176}, 'timestamp': '2025-09-10 02:30:15.969843', 'step': 6774, 'epoch': 3} {'type': 'loss', 'content': 8.118032565107569e-05, 'timestamp': '2025-09-10 02:30:15.981573', 'step': 6775, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 6644714900096}, 'timestamp': '2025-09-10 02:30:16.014106', 'step': 6775, 'epoch': 3} {'type': 'loss', 'content': 6.917355494806543e-05, 'timestamp': '2025-09-10 02:30:16.041935', 'step': 6776, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 7593922352128}, 'timestamp': '2025-09-10 02:30:16.072768', 'step': 6776, 'epoch': 3} {'type': 'loss', 'content': 6.683034735033289e-05, 'timestamp': '2025-09-10 02:30:16.077988', 'step': 6777, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 7119318626112}, 'timestamp': '2025-09-10 02:30:16.110417', 'step': 6777, 'epoch': 3} {'type': 'loss', 'content': 0.0003518997982610017, 'timestamp': '2025-09-10 02:30:16.117652', 'step': 6778, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 4746299996032}, 'timestamp': '2025-09-10 02:30:16.148684', 'step': 6778, 'epoch': 3} {'type': 'loss', 'content': 5.0944421673193574e-05, 'timestamp': '2025-09-10 02:30:16.151125', 'step': 6779, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 7119318626112}, 'timestamp': '2025-09-10 02:30:16.182224', 'step': 6779, 'epoch': 3} {'type': 'loss', 'content': 0.00011305516818538308, 'timestamp': '2025-09-10 02:30:16.210513', 'step': 6780, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 304], 'flops': 9017733530176}, 'timestamp': '2025-09-10 02:30:16.242733', 'step': 6780, 'epoch': 3} {'type': 'loss', 'content': 0.004298292566090822, 'timestamp': '2025-09-10 02:30:16.251720', 'step': 6781, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 304], 'flops': 9017733530176}, 'timestamp': '2025-09-10 02:30:16.282589', 'step': 6781, 'epoch': 3} {'type': 'loss', 'content': 0.00013062043581157923, 'timestamp': '2025-09-10 02:30:16.294634', 'step': 6782, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 7119318626112}, 'timestamp': '2025-09-10 02:30:16.329331', 'step': 6782, 'epoch': 3} {'type': 'loss', 'content': 0.012010819278657436, 'timestamp': '2025-09-10 02:30:16.336729', 'step': 6783, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 8543129804160}, 'timestamp': '2025-09-10 02:30:16.367635', 'step': 6783, 'epoch': 3} {'type': 'loss', 'content': 0.00013636215589940548, 'timestamp': '2025-09-10 02:30:16.399380', 'step': 6784, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 7119318626112}, 'timestamp': '2025-09-10 02:30:16.437640', 'step': 6784, 'epoch': 3} {'type': 'loss', 'content': 0.00012389972107484937, 'timestamp': '2025-09-10 02:30:16.442370', 'step': 6785, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 7119318626112}, 'timestamp': '2025-09-10 02:30:16.475988', 'step': 6785, 'epoch': 3} {'type': 'loss', 'content': 0.0001779487356543541, 'timestamp': '2025-09-10 02:30:16.483346', 'step': 6786, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 6644714900096}, 'timestamp': '2025-09-10 02:30:16.517989', 'step': 6786, 'epoch': 3} {'type': 'loss', 'content': 0.001430910429917276, 'timestamp': '2025-09-10 02:30:16.524922', 'step': 6787, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 304], 'flops': 9017733530176}, 'timestamp': '2025-09-10 02:30:16.558105', 'step': 6787, 'epoch': 3} {'type': 'loss', 'content': 8.068051829468459e-05, 'timestamp': '2025-09-10 02:30:16.590893', 'step': 6788, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 352], 'flops': 10441544708224}, 'timestamp': '2025-09-10 02:30:16.627308', 'step': 6788, 'epoch': 3} {'type': 'loss', 'content': 0.00024358855444006622, 'timestamp': '2025-09-10 02:30:16.640280', 'step': 6789, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-10 02:30:16.674026', 'step': 6789, 'epoch': 3} {'type': 'loss', 'content': 0.0007300904835574329, 'timestamp': '2025-09-10 02:30:16.678254', 'step': 6790, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-10 02:30:16.716899', 'step': 6790, 'epoch': 3} {'type': 'loss', 'content': 2.7813763153972104e-05, 'timestamp': '2025-09-10 02:30:16.719438', 'step': 6791, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 7593922352128}, 'timestamp': '2025-09-10 02:30:16.760030', 'step': 6791, 'epoch': 3} {'type': 'loss', 'content': 8.391224400838837e-05, 'timestamp': '2025-09-10 02:30:16.788260', 'step': 6792, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 7593922352128}, 'timestamp': '2025-09-10 02:30:16.824861', 'step': 6792, 'epoch': 3} {'type': 'loss', 'content': 0.0007873232243582606, 'timestamp': '2025-09-10 02:30:16.829373', 'step': 6793, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 512], 'flops': 15187581968384}, 'timestamp': '2025-09-10 02:30:16.874610', 'step': 6793, 'epoch': 3} {'type': 'loss', 'content': 8.269152749562636e-05, 'timestamp': '2025-09-10 02:30:16.892285', 'step': 6794, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 7593922352128}, 'timestamp': '2025-09-10 02:30:16.929849', 'step': 6794, 'epoch': 3} {'type': 'loss', 'content': 0.00020102993585169315, 'timestamp': '2025-09-10 02:30:16.937347', 'step': 6795, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 6644714900096}, 'timestamp': '2025-09-10 02:30:16.970154', 'step': 6795, 'epoch': 3} {'type': 'loss', 'content': 0.0012203119695186615, 'timestamp': '2025-09-10 02:30:16.997874', 'step': 6796, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 304], 'flops': 9017733530176}, 'timestamp': '2025-09-10 02:30:17.029699', 'step': 6796, 'epoch': 3} {'type': 'loss', 'content': 0.010345556773245335, 'timestamp': '2025-09-10 02:30:17.039296', 'step': 6797, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 176], 'flops': 5220903722048}, 'timestamp': '2025-09-10 02:30:17.073566', 'step': 6797, 'epoch': 3} {'type': 'loss', 'content': 0.007398456335067749, 'timestamp': '2025-09-10 02:30:17.077519', 'step': 6798, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 400], 'flops': 11865355886272}, 'timestamp': '2025-09-10 02:30:17.119440', 'step': 6798, 'epoch': 3} {'type': 'loss', 'content': 0.0006762798875570297, 'timestamp': '2025-09-10 02:30:17.135062', 'step': 6799, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 6644714900096}, 'timestamp': '2025-09-10 02:30:17.168051', 'step': 6799, 'epoch': 3} {'type': 'loss', 'content': 0.00012070146476617083, 'timestamp': '2025-09-10 02:30:17.196027', 'step': 6800, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 6644714900096}, 'timestamp': '2025-09-10 02:30:17.230664', 'step': 6800, 'epoch': 3} {'type': 'loss', 'content': 5.54533107788302e-05, 'timestamp': '2025-09-10 02:30:17.247016', 'step': 6801, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 384], 'flops': 11390752160256}, 'timestamp': '2025-09-10 02:30:17.285299', 'step': 6801, 'epoch': 3} {'type': 'loss', 'content': 6.327310256892815e-05, 'timestamp': '2025-09-10 02:30:17.299295', 'step': 6802, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 4746299996032}, 'timestamp': '2025-09-10 02:30:17.331893', 'step': 6802, 'epoch': 3} {'type': 'loss', 'content': 0.030736997723579407, 'timestamp': '2025-09-10 02:30:17.334233', 'step': 6803, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 320], 'flops': 9492337256192}, 'timestamp': '2025-09-10 02:30:17.369517', 'step': 6803, 'epoch': 3} {'type': 'loss', 'content': 0.0001166960719274357, 'timestamp': '2025-09-10 02:30:17.402857', 'step': 6804, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 400], 'flops': 11865355886272}, 'timestamp': '2025-09-10 02:30:17.440516', 'step': 6804, 'epoch': 3} {'type': 'loss', 'content': 0.00021152500994503498, 'timestamp': '2025-09-10 02:30:17.455582', 'step': 6805, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 4746299996032}, 'timestamp': '2025-09-10 02:30:17.489705', 'step': 6805, 'epoch': 3} {'type': 'loss', 'content': 0.00016026229423005134, 'timestamp': '2025-09-10 02:30:17.492235', 'step': 6806, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 400], 'flops': 11865355886272}, 'timestamp': '2025-09-10 02:30:17.531358', 'step': 6806, 'epoch': 3} {'type': 'loss', 'content': 0.0014069009339436889, 'timestamp': '2025-09-10 02:30:17.546926', 'step': 6807, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 7593922352128}, 'timestamp': '2025-09-10 02:30:17.581034', 'step': 6807, 'epoch': 3} {'type': 'loss', 'content': 0.0012990307295694947, 'timestamp': '2025-09-10 02:30:17.609309', 'step': 6808, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 8543129804160}, 'timestamp': '2025-09-10 02:30:17.642719', 'step': 6808, 'epoch': 3} {'type': 'loss', 'content': 0.0327615961432457, 'timestamp': '2025-09-10 02:30:17.650909', 'step': 6809, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 7119318626112}, 'timestamp': '2025-09-10 02:30:17.682485', 'step': 6809, 'epoch': 3} {'type': 'loss', 'content': 0.0003256215713918209, 'timestamp': '2025-09-10 02:30:17.689780', 'step': 6810, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 8068526078144}, 'timestamp': '2025-09-10 02:30:17.722498', 'step': 6810, 'epoch': 3} {'type': 'loss', 'content': 0.00016400113236159086, 'timestamp': '2025-09-10 02:30:17.732387', 'step': 6811, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 6170111174080}, 'timestamp': '2025-09-10 02:30:17.765986', 'step': 6811, 'epoch': 3} {'type': 'loss', 'content': 3.386579919606447e-05, 'timestamp': '2025-09-10 02:30:17.793719', 'step': 6812, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 304], 'flops': 9017733530176}, 'timestamp': '2025-09-10 02:30:17.829048', 'step': 6812, 'epoch': 3} {'type': 'loss', 'content': 7.543731771875173e-05, 'timestamp': '2025-09-10 02:30:17.838282', 'step': 6813, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 8543129804160}, 'timestamp': '2025-09-10 02:30:17.871913', 'step': 6813, 'epoch': 3} {'type': 'loss', 'content': 0.0010472764261066914, 'timestamp': '2025-09-10 02:30:17.882138', 'step': 6814, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 4746299996032}, 'timestamp': '2025-09-10 02:30:17.915235', 'step': 6814, 'epoch': 3} {'type': 'loss', 'content': 0.00010902778012678027, 'timestamp': '2025-09-10 02:30:17.917728', 'step': 6815, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 5695507448064}, 'timestamp': '2025-09-10 02:30:17.951419', 'step': 6815, 'epoch': 3} {'type': 'loss', 'content': 0.0002544302260503173, 'timestamp': '2025-09-10 02:30:17.976575', 'step': 6816, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 7119318626112}, 'timestamp': '2025-09-10 02:30:18.008867', 'step': 6816, 'epoch': 3} {'type': 'loss', 'content': 0.0003031869127880782, 'timestamp': '2025-09-10 02:30:18.013772', 'step': 6817, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 8543129804160}, 'timestamp': '2025-09-10 02:30:18.049470', 'step': 6817, 'epoch': 3} {'type': 'loss', 'content': 0.00049980339827016, 'timestamp': '2025-09-10 02:30:18.060303', 'step': 6818, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 336], 'flops': 9966940982208}, 'timestamp': '2025-09-10 02:30:18.097031', 'step': 6818, 'epoch': 3} {'type': 'loss', 'content': 0.00019091797003056854, 'timestamp': '2025-09-10 02:30:18.110426', 'step': 6819, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 5695507448064}, 'timestamp': '2025-09-10 02:30:18.143731', 'step': 6819, 'epoch': 3} {'type': 'loss', 'content': 0.0006083925254642963, 'timestamp': '2025-09-10 02:30:18.168937', 'step': 6820, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 336], 'flops': 9966940982208}, 'timestamp': '2025-09-10 02:30:18.203331', 'step': 6820, 'epoch': 3} {'type': 'loss', 'content': 3.496772114885971e-05, 'timestamp': '2025-09-10 02:30:18.216022', 'step': 6821, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 5695507448064}, 'timestamp': '2025-09-10 02:30:18.251835', 'step': 6821, 'epoch': 3} {'type': 'loss', 'content': 0.0001554272894281894, 'timestamp': '2025-09-10 02:30:18.255935', 'step': 6822, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 8068526078144}, 'timestamp': '2025-09-10 02:30:18.291116', 'step': 6822, 'epoch': 3} {'type': 'loss', 'content': 0.0005963979056105018, 'timestamp': '2025-09-10 02:30:18.301097', 'step': 6823, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 8543129804160}, 'timestamp': '2025-09-10 02:30:18.332866', 'step': 6823, 'epoch': 3} {'type': 'loss', 'content': 8.008737495401874e-05, 'timestamp': '2025-09-10 02:30:18.364544', 'step': 6824, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 336], 'flops': 9966940982208}, 'timestamp': '2025-09-10 02:30:18.399060', 'step': 6824, 'epoch': 3} {'type': 'loss', 'content': 0.00042097517871297896, 'timestamp': '2025-09-10 02:30:18.411561', 'step': 6825, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 8068526078144}, 'timestamp': '2025-09-10 02:30:18.448099', 'step': 6825, 'epoch': 3} {'type': 'loss', 'content': 6.969293463043869e-05, 'timestamp': '2025-09-10 02:30:18.457796', 'step': 6826, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 7593922352128}, 'timestamp': '2025-09-10 02:30:18.493630', 'step': 6826, 'epoch': 3} {'type': 'loss', 'content': 0.0014862669631838799, 'timestamp': '2025-09-10 02:30:18.501335', 'step': 6827, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 6170111174080}, 'timestamp': '2025-09-10 02:30:18.535521', 'step': 6827, 'epoch': 3} {'type': 'loss', 'content': 0.00019058329053223133, 'timestamp': '2025-09-10 02:30:18.563006', 'step': 6828, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 6170111174080}, 'timestamp': '2025-09-10 02:30:18.601204', 'step': 6828, 'epoch': 3} {'type': 'loss', 'content': 0.0002576792612671852, 'timestamp': '2025-09-10 02:30:18.605856', 'step': 6829, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 7119318626112}, 'timestamp': '2025-09-10 02:30:18.637920', 'step': 6829, 'epoch': 3} {'type': 'loss', 'content': 0.0004352860269136727, 'timestamp': '2025-09-10 02:30:18.645472', 'step': 6830, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 6170111174080}, 'timestamp': '2025-09-10 02:30:18.681478', 'step': 6830, 'epoch': 3} {'type': 'loss', 'content': 3.436298720771447e-05, 'timestamp': '2025-09-10 02:30:18.688357', 'step': 6831, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 5695507448064}, 'timestamp': '2025-09-10 02:30:18.724012', 'step': 6831, 'epoch': 3} {'type': 'loss', 'content': 0.00022188770526554435, 'timestamp': '2025-09-10 02:30:18.749357', 'step': 6832, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-10 02:30:18.780158', 'step': 6832, 'epoch': 3} {'type': 'loss', 'content': 0.0015023789601400495, 'timestamp': '2025-09-10 02:30:18.784655', 'step': 6833, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 304], 'flops': 9017733530176}, 'timestamp': '2025-09-10 02:30:18.820484', 'step': 6833, 'epoch': 3} {'type': 'loss', 'content': 0.0012278666254132986, 'timestamp': '2025-09-10 02:30:18.832468', 'step': 6834, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 176], 'flops': 5220903722048}, 'timestamp': '2025-09-10 02:30:18.865085', 'step': 6834, 'epoch': 3} {'type': 'loss', 'content': 0.005471336655318737, 'timestamp': '2025-09-10 02:30:18.869075', 'step': 6835, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 7119318626112}, 'timestamp': '2025-09-10 02:30:18.901174', 'step': 6835, 'epoch': 3} {'type': 'loss', 'content': 0.0007918172632344067, 'timestamp': '2025-09-10 02:30:18.929621', 'step': 6836, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 8068526078144}, 'timestamp': '2025-09-10 02:30:18.960867', 'step': 6836, 'epoch': 3} {'type': 'loss', 'content': 0.00016386432980652899, 'timestamp': '2025-09-10 02:30:18.968220', 'step': 6837, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 6170111174080}, 'timestamp': '2025-09-10 02:30:18.998902', 'step': 6837, 'epoch': 3} {'type': 'loss', 'content': 0.00040015511331148446, 'timestamp': '2025-09-10 02:30:19.005819', 'step': 6838, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 8543129804160}, 'timestamp': '2025-09-10 02:30:19.039481', 'step': 6838, 'epoch': 3} {'type': 'loss', 'content': 0.0003809529298450798, 'timestamp': '2025-09-10 02:30:19.050226', 'step': 6839, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 6644714900096}, 'timestamp': '2025-09-10 02:30:19.089016', 'step': 6839, 'epoch': 3} {'type': 'loss', 'content': 0.00012751105532515794, 'timestamp': '2025-09-10 02:30:19.116968', 'step': 6840, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 5695507448064}, 'timestamp': '2025-09-10 02:30:19.154062', 'step': 6840, 'epoch': 3} {'type': 'loss', 'content': 0.0002030668401857838, 'timestamp': '2025-09-10 02:30:19.156202', 'step': 6841, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 8068526078144}, 'timestamp': '2025-09-10 02:30:19.196680', 'step': 6841, 'epoch': 3} {'type': 'loss', 'content': 8.849247387843207e-05, 'timestamp': '2025-09-10 02:30:19.207016', 'step': 6842, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 4746299996032}, 'timestamp': '2025-09-10 02:30:19.240281', 'step': 6842, 'epoch': 3} {'type': 'loss', 'content': 7.702614675508812e-05, 'timestamp': '2025-09-10 02:30:19.242811', 'step': 6843, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 5695507448064}, 'timestamp': '2025-09-10 02:30:19.274075', 'step': 6843, 'epoch': 3} {'type': 'loss', 'content': 0.00011114530934719369, 'timestamp': '2025-09-10 02:30:19.299505', 'step': 6844, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 4746299996032}, 'timestamp': '2025-09-10 02:30:19.330251', 'step': 6844, 'epoch': 3} {'type': 'loss', 'content': 0.00036023682332597673, 'timestamp': '2025-09-10 02:30:19.332826', 'step': 6845, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 8068526078144}, 'timestamp': '2025-09-10 02:30:19.363969', 'step': 6845, 'epoch': 3} {'type': 'loss', 'content': 0.00022267237363848835, 'timestamp': '2025-09-10 02:30:19.374234', 'step': 6846, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 320], 'flops': 9492337256192}, 'timestamp': '2025-09-10 02:30:19.413588', 'step': 6846, 'epoch': 3} {'type': 'loss', 'content': 0.00026117305969819427, 'timestamp': '2025-09-10 02:30:19.426145', 'step': 6847, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 6170111174080}, 'timestamp': '2025-09-10 02:30:19.457215', 'step': 6847, 'epoch': 3} {'type': 'loss', 'content': 0.00017910859605763108, 'timestamp': '2025-09-10 02:30:19.484847', 'step': 6848, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 304], 'flops': 9017733530176}, 'timestamp': '2025-09-10 02:30:19.518918', 'step': 6848, 'epoch': 3} {'type': 'loss', 'content': 0.00025339677813462913, 'timestamp': '2025-09-10 02:30:19.528648', 'step': 6849, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 400], 'flops': 11865355886272}, 'timestamp': '2025-09-10 02:30:19.569357', 'step': 6849, 'epoch': 3} {'type': 'loss', 'content': 0.00041114582563750446, 'timestamp': '2025-09-10 02:30:19.585004', 'step': 6850, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 368], 'flops': 10916148434240}, 'timestamp': '2025-09-10 02:30:19.626859', 'step': 6850, 'epoch': 3} {'type': 'loss', 'content': 0.002163324737921357, 'timestamp': '2025-09-10 02:30:19.640712', 'step': 6851, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 6170111174080}, 'timestamp': '2025-09-10 02:30:19.677256', 'step': 6851, 'epoch': 3} {'type': 'loss', 'content': 0.00018701299268286675, 'timestamp': '2025-09-10 02:30:19.705133', 'step': 6852, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 336], 'flops': 9966940982208}, 'timestamp': '2025-09-10 02:30:19.738947', 'step': 6852, 'epoch': 3} {'type': 'loss', 'content': 0.0005152305820956826, 'timestamp': '2025-09-10 02:30:19.751590', 'step': 6853, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 7119318626112}, 'timestamp': '2025-09-10 02:30:19.785229', 'step': 6853, 'epoch': 3} {'type': 'loss', 'content': 0.007716981228441, 'timestamp': '2025-09-10 02:30:19.792581', 'step': 6854, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 4746299996032}, 'timestamp': '2025-09-10 02:30:19.823461', 'step': 6854, 'epoch': 3} {'type': 'loss', 'content': 0.0002965559542644769, 'timestamp': '2025-09-10 02:30:19.826056', 'step': 6855, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 8068526078144}, 'timestamp': '2025-09-10 02:30:19.862266', 'step': 6855, 'epoch': 3} {'type': 'loss', 'content': 0.00017139650299213827, 'timestamp': '2025-09-10 02:30:19.893207', 'step': 6856, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 8068526078144}, 'timestamp': '2025-09-10 02:30:19.928880', 'step': 6856, 'epoch': 3} {'type': 'loss', 'content': 0.00021169218234717846, 'timestamp': '2025-09-10 02:30:19.936746', 'step': 6857, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 336], 'flops': 9966940982208}, 'timestamp': '2025-09-10 02:30:19.976093', 'step': 6857, 'epoch': 3} {'type': 'loss', 'content': 0.00011072350753238425, 'timestamp': '2025-09-10 02:30:19.989442', 'step': 6858, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 7119318626112}, 'timestamp': '2025-09-10 02:30:20.022347', 'step': 6858, 'epoch': 3} {'type': 'loss', 'content': 0.0019564726389944553, 'timestamp': '2025-09-10 02:30:20.029521', 'step': 6859, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 320], 'flops': 9492337256192}, 'timestamp': '2025-09-10 02:30:20.062143', 'step': 6859, 'epoch': 3} {'type': 'loss', 'content': 0.00042336867772974074, 'timestamp': '2025-09-10 02:30:20.095512', 'step': 6860, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 7119318626112}, 'timestamp': '2025-09-10 02:30:20.133531', 'step': 6860, 'epoch': 3} {'type': 'loss', 'content': 0.00017614095122553408, 'timestamp': '2025-09-10 02:30:20.138738', 'step': 6861, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 7119318626112}, 'timestamp': '2025-09-10 02:30:20.174233', 'step': 6861, 'epoch': 3} {'type': 'loss', 'content': 0.00017980553093366325, 'timestamp': '2025-09-10 02:30:20.184889', 'step': 6862, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 7119318626112}, 'timestamp': '2025-09-10 02:30:20.222653', 'step': 6862, 'epoch': 3} {'type': 'loss', 'content': 7.931239815661684e-05, 'timestamp': '2025-09-10 02:30:20.230060', 'step': 6863, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-10 02:30:20.260929', 'step': 6863, 'epoch': 3} {'type': 'loss', 'content': 0.02051333151757717, 'timestamp': '2025-09-10 02:30:20.285056', 'step': 6864, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 384], 'flops': 11390752160256}, 'timestamp': '2025-09-10 02:30:20.325514', 'step': 6864, 'epoch': 3} {'type': 'loss', 'content': 8.088215690804645e-05, 'timestamp': '2025-09-10 02:30:20.338810', 'step': 6865, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 6170111174080}, 'timestamp': '2025-09-10 02:30:20.372656', 'step': 6865, 'epoch': 3} {'type': 'loss', 'content': 0.0001564481353852898, 'timestamp': '2025-09-10 02:30:20.379373', 'step': 6866, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 320], 'flops': 9492337256192}, 'timestamp': '2025-09-10 02:30:20.414627', 'step': 6866, 'epoch': 3} {'type': 'loss', 'content': 0.00010649115574778989, 'timestamp': '2025-09-10 02:30:20.427159', 'step': 6867, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 7593922352128}, 'timestamp': '2025-09-10 02:30:20.457949', 'step': 6867, 'epoch': 3} {'type': 'loss', 'content': 0.0001269724016310647, 'timestamp': '2025-09-10 02:30:20.486703', 'step': 6868, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 7119318626112}, 'timestamp': '2025-09-10 02:30:20.524304', 'step': 6868, 'epoch': 3} {'type': 'loss', 'content': 0.0010912258876487613, 'timestamp': '2025-09-10 02:30:20.529129', 'step': 6869, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 6170111174080}, 'timestamp': '2025-09-10 02:30:20.560997', 'step': 6869, 'epoch': 3} {'type': 'loss', 'content': 9.636014874558896e-05, 'timestamp': '2025-09-10 02:30:20.567548', 'step': 6870, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 368], 'flops': 10916148434240}, 'timestamp': '2025-09-10 02:30:20.602864', 'step': 6870, 'epoch': 3} {'type': 'loss', 'content': 0.000113781621621456, 'timestamp': '2025-09-10 02:30:20.616611', 'step': 6871, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 6644714900096}, 'timestamp': '2025-09-10 02:30:20.654164', 'step': 6871, 'epoch': 3} {'type': 'loss', 'content': 9.113257692661136e-05, 'timestamp': '2025-09-10 02:30:20.685367', 'step': 6872, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 6644714900096}, 'timestamp': '2025-09-10 02:30:20.715516', 'step': 6872, 'epoch': 3} {'type': 'loss', 'content': 0.0006366458837874234, 'timestamp': '2025-09-10 02:30:20.720287', 'step': 6873, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 6644714900096}, 'timestamp': '2025-09-10 02:30:20.751250', 'step': 6873, 'epoch': 3} {'type': 'loss', 'content': 0.0002216670400230214, 'timestamp': '2025-09-10 02:30:20.758358', 'step': 6874, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 304], 'flops': 9017733530176}, 'timestamp': '2025-09-10 02:30:20.797148', 'step': 6874, 'epoch': 3} {'type': 'loss', 'content': 0.0001820830802898854, 'timestamp': '2025-09-10 02:30:20.809371', 'step': 6875, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 6170111174080}, 'timestamp': '2025-09-10 02:30:20.848562', 'step': 6875, 'epoch': 3} {'type': 'loss', 'content': 0.005944172386080027, 'timestamp': '2025-09-10 02:30:20.876414', 'step': 6876, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 176], 'flops': 5220903722048}, 'timestamp': '2025-09-10 02:30:20.909153', 'step': 6876, 'epoch': 3} {'type': 'loss', 'content': 5.662976036546752e-05, 'timestamp': '2025-09-10 02:30:20.911434', 'step': 6877, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 368], 'flops': 10916148434240}, 'timestamp': '2025-09-10 02:30:20.951774', 'step': 6877, 'epoch': 3} {'type': 'loss', 'content': 0.00043625704711303115, 'timestamp': '2025-09-10 02:30:20.965576', 'step': 6878, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 7593922352128}, 'timestamp': '2025-09-10 02:30:21.004372', 'step': 6878, 'epoch': 3} {'type': 'loss', 'content': 0.00010746198677225038, 'timestamp': '2025-09-10 02:30:21.012127', 'step': 6879, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 8068526078144}, 'timestamp': '2025-09-10 02:30:21.044248', 'step': 6879, 'epoch': 3} {'type': 'loss', 'content': 0.00030602794140577316, 'timestamp': '2025-09-10 02:30:21.075223', 'step': 6880, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 8068526078144}, 'timestamp': '2025-09-10 02:30:21.114259', 'step': 6880, 'epoch': 3} {'type': 'loss', 'content': 0.00037156790494918823, 'timestamp': '2025-09-10 02:30:21.121592', 'step': 6881, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 8068526078144}, 'timestamp': '2025-09-10 02:30:21.152891', 'step': 6881, 'epoch': 3} {'type': 'loss', 'content': 0.00018727740098256618, 'timestamp': '2025-09-10 02:30:21.163096', 'step': 6882, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 5695507448064}, 'timestamp': '2025-09-10 02:30:21.198073', 'step': 6882, 'epoch': 3} {'type': 'loss', 'content': 9.839085396379232e-05, 'timestamp': '2025-09-10 02:30:21.202551', 'step': 6883, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 6170111174080}, 'timestamp': '2025-09-10 02:30:21.233647', 'step': 6883, 'epoch': 3} {'type': 'loss', 'content': 0.000886984693352133, 'timestamp': '2025-09-10 02:30:21.261268', 'step': 6884, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 8068526078144}, 'timestamp': '2025-09-10 02:30:21.294025', 'step': 6884, 'epoch': 3} {'type': 'loss', 'content': 0.00016462511848658323, 'timestamp': '2025-09-10 02:30:21.301922', 'step': 6885, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 6170111174080}, 'timestamp': '2025-09-10 02:30:21.333917', 'step': 6885, 'epoch': 3} {'type': 'loss', 'content': 0.0043139709159731865, 'timestamp': '2025-09-10 02:30:21.340952', 'step': 6886, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 7119318626112}, 'timestamp': '2025-09-10 02:30:21.380516', 'step': 6886, 'epoch': 3} {'type': 'loss', 'content': 0.0008590264478698373, 'timestamp': '2025-09-10 02:30:21.387960', 'step': 6887, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 6644714900096}, 'timestamp': '2025-09-10 02:30:21.424227', 'step': 6887, 'epoch': 3} {'type': 'loss', 'content': 0.0003677209315355867, 'timestamp': '2025-09-10 02:30:21.452110', 'step': 6888, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 8543129804160}, 'timestamp': '2025-09-10 02:30:21.482477', 'step': 6888, 'epoch': 3} {'type': 'loss', 'content': 9.307049185736105e-05, 'timestamp': '2025-09-10 02:30:21.491069', 'step': 6889, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 320], 'flops': 9492337256192}, 'timestamp': '2025-09-10 02:30:21.521828', 'step': 6889, 'epoch': 3} {'type': 'loss', 'content': 8.364223322132602e-05, 'timestamp': '2025-09-10 02:30:21.534406', 'step': 6890, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 176], 'flops': 5220903722048}, 'timestamp': '2025-09-10 02:30:21.570779', 'step': 6890, 'epoch': 3} {'type': 'loss', 'content': 0.00047637257375754416, 'timestamp': '2025-09-10 02:30:21.574792', 'step': 6891, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 4746299996032}, 'timestamp': '2025-09-10 02:30:21.607469', 'step': 6891, 'epoch': 3} {'type': 'loss', 'content': 0.0028176165651530027, 'timestamp': '2025-09-10 02:30:21.632477', 'step': 6892, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 4746299996032}, 'timestamp': '2025-09-10 02:30:21.697379', 'step': 6892, 'epoch': 3} {'type': 'loss', 'content': 0.0002470030449330807, 'timestamp': '2025-09-10 02:30:21.699510', 'step': 6893, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 336], 'flops': 9966940982208}, 'timestamp': '2025-09-10 02:30:21.735128', 'step': 6893, 'epoch': 3} {'type': 'loss', 'content': 0.0006550102843903005, 'timestamp': '2025-09-10 02:30:21.748541', 'step': 6894, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 6644714900096}, 'timestamp': '2025-09-10 02:30:21.789661', 'step': 6894, 'epoch': 3} {'type': 'loss', 'content': 0.00011021040700143203, 'timestamp': '2025-09-10 02:30:21.796606', 'step': 6895, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 7593922352128}, 'timestamp': '2025-09-10 02:30:21.831675', 'step': 6895, 'epoch': 3} {'type': 'loss', 'content': 0.0010865674121305346, 'timestamp': '2025-09-10 02:30:21.860184', 'step': 6896, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 176], 'flops': 5220903722048}, 'timestamp': '2025-09-10 02:30:21.893488', 'step': 6896, 'epoch': 3} {'type': 'loss', 'content': 0.0001547595311421901, 'timestamp': '2025-09-10 02:30:21.896158', 'step': 6897, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 8068526078144}, 'timestamp': '2025-09-10 02:30:21.926703', 'step': 6897, 'epoch': 3} {'type': 'loss', 'content': 0.00014410761650651693, 'timestamp': '2025-09-10 02:30:21.937151', 'step': 6898, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 176], 'flops': 5220903722048}, 'timestamp': '2025-09-10 02:30:21.967455', 'step': 6898, 'epoch': 3} {'type': 'loss', 'content': 0.0005947855534031987, 'timestamp': '2025-09-10 02:30:21.971499', 'step': 6899, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 5695507448064}, 'timestamp': '2025-09-10 02:30:22.002647', 'step': 6899, 'epoch': 3} {'type': 'loss', 'content': 0.00025538000045344234, 'timestamp': '2025-09-10 02:30:22.027939', 'step': 6900, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 368], 'flops': 10916148434240}, 'timestamp': '2025-09-10 02:30:22.060817', 'step': 6900, 'epoch': 3} {'type': 'loss', 'content': 0.0005543892038986087, 'timestamp': '2025-09-10 02:30:22.073926', 'step': 6901, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 6170111174080}, 'timestamp': '2025-09-10 02:30:22.114769', 'step': 6901, 'epoch': 3} {'type': 'loss', 'content': 0.0021151488181203604, 'timestamp': '2025-09-10 02:30:22.121566', 'step': 6902, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 5695507448064}, 'timestamp': '2025-09-10 02:30:22.162625', 'step': 6902, 'epoch': 3} {'type': 'loss', 'content': 0.00011541605636011809, 'timestamp': '2025-09-10 02:30:22.166933', 'step': 6903, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 7593922352128}, 'timestamp': '2025-09-10 02:30:22.197743', 'step': 6903, 'epoch': 3} {'type': 'loss', 'content': 0.0013175479834899306, 'timestamp': '2025-09-10 02:30:22.226283', 'step': 6904, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 464], 'flops': 13763770790336}, 'timestamp': '2025-09-10 02:30:22.281595', 'step': 6904, 'epoch': 3} {'type': 'loss', 'content': 3.248906432418153e-05, 'timestamp': '2025-09-10 02:30:22.298306', 'step': 6905, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 4746299996032}, 'timestamp': '2025-09-10 02:30:22.328426', 'step': 6905, 'epoch': 3} {'type': 'loss', 'content': 4.818878369405866e-05, 'timestamp': '2025-09-10 02:30:22.330887', 'step': 6906, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 6644714900096}, 'timestamp': '2025-09-10 02:30:22.361346', 'step': 6906, 'epoch': 3} {'type': 'loss', 'content': 0.0005144443712197244, 'timestamp': '2025-09-10 02:30:22.368298', 'step': 6907, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 320], 'flops': 9492337256192}, 'timestamp': '2025-09-10 02:30:22.400777', 'step': 6907, 'epoch': 3} {'type': 'loss', 'content': 4.7616198571631685e-05, 'timestamp': '2025-09-10 02:30:22.434318', 'step': 6908, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 7119318626112}, 'timestamp': '2025-09-10 02:30:22.469807', 'step': 6908, 'epoch': 3} {'type': 'loss', 'content': 0.00013233958452474326, 'timestamp': '2025-09-10 02:30:22.474890', 'step': 6909, 'epoch': 3} {'type': 'flops', 'content': [{'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5695213340928}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3796808916480}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4429610391296}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4746011128704}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4746011128704}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6011614078336}, {'type': 'perplexity', 'in_batch_dim': [8, 736], 'batch_size': 8, 'flops': 14554433988352}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3796808916480}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4113209653888}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3796808916480}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4746011128704}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2847606704256}, {'type': 'perplexity', 'in_batch_dim': [8, 320], 'batch_size': 8, 'flops': 6328014815744}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5378812603520}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3796808916480}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4746011128704}, {'type': 'perplexity', 'in_batch_dim': [8, 352], 'batch_size': 8, 'flops': 6960816290560}, {'type': 'perplexity', 'in_batch_dim': [8, 384], 'batch_size': 8, 'flops': 7593617765376}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5378812603520}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4429610391296}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4113209653888}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5062411866112}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5062411866112}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4429610391296}, {'type': 'perplexity', 'in_batch_dim': [8, 480], 'batch_size': 8, 'flops': 9492022189824}, {'type': 'perplexity', 'in_batch_dim': [8, 448], 'batch_size': 8, 'flops': 8859220715008}, {'type': 'perplexity', 'in_batch_dim': [8, 320], 'batch_size': 8, 'flops': 6328014815744}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3796808916480}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6011614078336}, {'type': 'perplexity', 'in_batch_dim': [8, 320], 'batch_size': 8, 'flops': 6328014815744}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5695213340928}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4746011128704}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4429610391296}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5378812603520}, {'type': 'perplexity', 'in_batch_dim': [8, 544], 'batch_size': 8, 'flops': 10757625139456}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4429610391296}, {'type': 'perplexity', 'in_batch_dim': [8, 320], 'batch_size': 8, 'flops': 6328014815744}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4113209653888}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4113209653888}, {'type': 'perplexity', 'in_batch_dim': [8, 320], 'batch_size': 8, 'flops': 6328014815744}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3480408179072}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5062411866112}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6011614078336}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4429610391296}, {'type': 'perplexity', 'in_batch_dim': [8, 384], 'batch_size': 8, 'flops': 7593617765376}, {'type': 'perplexity', 'in_batch_dim': [8, 416], 'batch_size': 8, 'flops': 8226419240192}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4746011128704}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3480408179072}, {'type': 'perplexity', 'in_batch_dim': [8, 384], 'batch_size': 8, 'flops': 7593617765376}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3480408179072}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3480408179072}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5378812603520}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3796808916480}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4113209653888}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5062411866112}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5378812603520}, {'type': 'perplexity', 'in_batch_dim': [8, 320], 'batch_size': 8, 'flops': 6328014815744}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4429610391296}, {'type': 'perplexity', 'in_batch_dim': [8, 160], 'batch_size': 8, 'flops': 3164007441664}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4113209653888}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5695213340928}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3796808916480}, {'type': 'perplexity', 'in_batch_dim': [8, 160], 'batch_size': 8, 'flops': 3164007441664}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6011614078336}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5062411866112}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4429610391296}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3480408179072}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4746011128704}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3480408179072}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4113209653888}, {'type': 'perplexity', 'in_batch_dim': [8, 624], 'batch_size': 8, 'flops': 12339628826496}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4113209653888}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5695213340928}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5062411866112}, {'type': 'perplexity', 'in_batch_dim': [8, 384], 'batch_size': 8, 'flops': 7593617765376}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4429610391296}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4113209653888}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5062411866112}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4429610391296}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5695213340928}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3796808916480}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5378812603520}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4429610391296}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5378812603520}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3796808916480}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5695213340928}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4429610391296}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5062411866112}, {'type': 'perplexity', 'in_batch_dim': [8, 160], 'batch_size': 8, 'flops': 3164007441664}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4746011128704}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5695213340928}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3796808916480}, {'type': 'perplexity', 'in_batch_dim': [8, 416], 'batch_size': 8, 'flops': 8226419240192}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3796808916480}, {'type': 'perplexity', 'in_batch_dim': [8, 160], 'batch_size': 8, 'flops': 3164007441664}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4746011128704}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3796808916480}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2847606704256}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5378812603520}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4429610391296}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4113209653888}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4429610391296}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6011614078336}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4113209653888}, {'type': 'perplexity', 'in_batch_dim': [8, 320], 'batch_size': 8, 'flops': 6328014815744}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4746011128704}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4746011128704}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3796808916480}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5062411866112}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3480408179072}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3796808916480}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3480408179072}, {'type': 'perplexity', 'in_batch_dim': [8, 352], 'batch_size': 8, 'flops': 6960816290560}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5062411866112}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4429610391296}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6011614078336}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5062411866112}, {'type': 'perplexity', 'in_batch_dim': [8, 576], 'batch_size': 8, 'flops': 11390426614272}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4429610391296}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3796808916480}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6011614078336}, {'type': 'perplexity', 'in_batch_dim': [8, 432], 'batch_size': 8, 'flops': 8542819977600}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5378812603520}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5378812603520}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5062411866112}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4113209653888}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4113209653888}, {'type': 'perplexity', 'in_batch_dim': [8, 368], 'batch_size': 8, 'flops': 7277217027968}, {'type': 'perplexity', 'in_batch_dim': [8, 160], 'batch_size': 8, 'flops': 3164007441664}, {'type': 'perplexity', 'in_batch_dim': [8, 368], 'batch_size': 8, 'flops': 7277217027968}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3796808916480}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4746011128704}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4113209653888}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4429610391296}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5695213340928}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5695213340928}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5062411866112}, {'type': 'perplexity', 'in_batch_dim': [8, 432], 'batch_size': 8, 'flops': 8542819977600}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5062411866112}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4113209653888}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3480408179072}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3480408179072}, {'type': 'perplexity', 'in_batch_dim': [8, 160], 'batch_size': 8, 'flops': 3164007441664}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4113209653888}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3796808916480}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5062411866112}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 368], 'batch_size': 8, 'flops': 7277217027968}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5378812603520}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5378812603520}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5062411866112}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2847606704256}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4429610391296}, {'type': 'perplexity', 'in_batch_dim': [8, 320], 'batch_size': 8, 'flops': 6328014815744}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3796808916480}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4429610391296}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4113209653888}, {'type': 'perplexity', 'in_batch_dim': [8, 384], 'batch_size': 8, 'flops': 7593617765376}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5378812603520}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4746011128704}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3480408179072}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5062411866112}, {'type': 'perplexity', 'in_batch_dim': [8, 320], 'batch_size': 8, 'flops': 6328014815744}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5378812603520}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4113209653888}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4113209653888}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5695213340928}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3480408179072}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6011614078336}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4746011128704}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5378812603520}, {'type': 'perplexity', 'in_batch_dim': [8, 320], 'batch_size': 8, 'flops': 6328014815744}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4429610391296}, {'type': 'perplexity', 'in_batch_dim': [8, 320], 'batch_size': 8, 'flops': 6328014815744}, {'type': 'perplexity', 'in_batch_dim': [8, 416], 'batch_size': 8, 'flops': 8226419240192}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5062411866112}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5062411866112}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2847606704256}, {'type': 'perplexity', 'in_batch_dim': [8, 336], 'batch_size': 8, 'flops': 6644415553152}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6011614078336}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5062411866112}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4113209653888}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5695213340928}, {'type': 'perplexity', 'in_batch_dim': [8, 416], 'batch_size': 8, 'flops': 8226419240192}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3796808916480}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5695213340928}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4746011128704}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4429610391296}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4113209653888}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3480408179072}, {'type': 'perplexity', 'in_batch_dim': [8, 320], 'batch_size': 8, 'flops': 6328014815744}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3480408179072}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5378812603520}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6011614078336}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4429610391296}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5695213340928}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5695213340928}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3480408179072}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4429610391296}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5062411866112}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3480408179072}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4113209653888}, {'type': 'perplexity', 'in_batch_dim': [8, 352], 'batch_size': 8, 'flops': 6960816290560}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4113209653888}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2847606704256}, {'type': 'perplexity', 'in_batch_dim': [8, 160], 'batch_size': 8, 'flops': 3164007441664}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3796808916480}, {'type': 'perplexity', 'in_batch_dim': [8, 496], 'batch_size': 8, 'flops': 9808422927232}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3480408179072}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4113209653888}, {'type': 'perplexity', 'in_batch_dim': [8, 544], 'batch_size': 8, 'flops': 10757625139456}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6011614078336}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5378812603520}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5695213340928}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4746011128704}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5695213340928}, {'type': 'perplexity', 'in_batch_dim': [8, 384], 'batch_size': 8, 'flops': 7593617765376}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3480408179072}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5378812603520}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3480408179072}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5378812603520}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5062411866112}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3796808916480}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5062411866112}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5695213340928}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5378812603520}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4429610391296}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5062411866112}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3480408179072}, {'type': 'perplexity', 'in_batch_dim': [8, 656], 'batch_size': 8, 'flops': 12972430301312}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5062411866112}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3796808916480}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3480408179072}, {'type': 'perplexity', 'in_batch_dim': [8, 160], 'batch_size': 8, 'flops': 3164007441664}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5695213340928}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6011614078336}, {'type': 'perplexity', 'in_batch_dim': [8, 320], 'batch_size': 8, 'flops': 6328014815744}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5062411866112}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5062411866112}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5695213340928}, {'type': 'perplexity', 'in_batch_dim': [8, 320], 'batch_size': 8, 'flops': 6328014815744}, {'type': 'perplexity', 'in_batch_dim': [8, 384], 'batch_size': 8, 'flops': 7593617765376}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5062411866112}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4746011128704}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4113209653888}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4746011128704}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4746011128704}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3480408179072}, {'type': 'perplexity', 'in_batch_dim': [8, 320], 'batch_size': 8, 'flops': 6328014815744}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5695213340928}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5062411866112}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4113209653888}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4746011128704}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2847606704256}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3796808916480}, {'type': 'perplexity', 'in_batch_dim': [8, 336], 'batch_size': 8, 'flops': 6644415553152}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4113209653888}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4746011128704}, {'type': 'perplexity', 'in_batch_dim': [8, 496], 'batch_size': 8, 'flops': 9808422927232}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5378812603520}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5378812603520}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4429610391296}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5062411866112}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6011614078336}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4746011128704}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3796808916480}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4113209653888}, {'type': 'perplexity', 'in_batch_dim': [8, 496], 'batch_size': 8, 'flops': 9808422927232}, {'type': 'perplexity', 'in_batch_dim': [8, 448], 'batch_size': 8, 'flops': 8859220715008}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6011614078336}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3796808916480}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3480408179072}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5062411866112}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4429610391296}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4113209653888}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4746011128704}, {'type': 'perplexity', 'in_batch_dim': [8, 320], 'batch_size': 8, 'flops': 6328014815744}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6011614078336}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3480408179072}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3796808916480}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3796808916480}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4746011128704}, {'type': 'perplexity', 'in_batch_dim': [8, 416], 'batch_size': 8, 'flops': 8226419240192}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5695213340928}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6011614078336}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6011614078336}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4746011128704}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3480408179072}, {'type': 'perplexity', 'in_batch_dim': [8, 160], 'batch_size': 8, 'flops': 3164007441664}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5695213340928}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3480408179072}, {'type': 'perplexity', 'in_batch_dim': [8, 400], 'batch_size': 8, 'flops': 7910018502784}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4429610391296}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5378812603520}, {'type': 'perplexity', 'in_batch_dim': [8, 400], 'batch_size': 8, 'flops': 7910018502784}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5378812603520}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3480408179072}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6011614078336}, {'type': 'perplexity', 'in_batch_dim': [8, 160], 'batch_size': 8, 'flops': 3164007441664}, {'type': 'perplexity', 'in_batch_dim': [8, 352], 'batch_size': 8, 'flops': 6960816290560}, {'type': 'perplexity', 'in_batch_dim': [8, 320], 'batch_size': 8, 'flops': 6328014815744}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5062411866112}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5062411866112}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4113209653888}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4429610391296}, {'type': 'perplexity', 'in_batch_dim': [8, 352], 'batch_size': 8, 'flops': 6960816290560}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4113209653888}, {'type': 'perplexity', 'in_batch_dim': [8, 160], 'batch_size': 8, 'flops': 3164007441664}, {'type': 'perplexity', 'in_batch_dim': [8, 336], 'batch_size': 8, 'flops': 6644415553152}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4429610391296}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3796808916480}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5378812603520}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5378812603520}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4113209653888}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5378812603520}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3796808916480}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5378812603520}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5378812603520}, {'type': 'perplexity', 'in_batch_dim': [8, 160], 'batch_size': 8, 'flops': 3164007441664}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3480408179072}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6011614078336}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5695213340928}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5378812603520}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5378812603520}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4429610391296}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4746011128704}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6011614078336}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5062411866112}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6011614078336}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5378812603520}, {'type': 'perplexity', 'in_batch_dim': [8, 160], 'batch_size': 8, 'flops': 3164007441664}, {'type': 'perplexity', 'in_batch_dim': [8, 160], 'batch_size': 8, 'flops': 3164007441664}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5062411866112}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5062411866112}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3796808916480}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6011614078336}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4113209653888}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4746011128704}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5378812603520}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3480408179072}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3796808916480}, {'type': 'perplexity', 'in_batch_dim': [8, 320], 'batch_size': 8, 'flops': 6328014815744}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5062411866112}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4746011128704}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5062411866112}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4746011128704}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6011614078336}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3796808916480}, {'type': 'perplexity', 'in_batch_dim': [8, 160], 'batch_size': 8, 'flops': 3164007441664}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2847606704256}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4429610391296}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4746011128704}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5062411866112}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5378812603520}, {'type': 'perplexity', 'in_batch_dim': [8, 464], 'batch_size': 8, 'flops': 9175621452416}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4113209653888}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3480408179072}, {'type': 'perplexity', 'in_batch_dim': [8, 384], 'batch_size': 8, 'flops': 7593617765376}, {'type': 'perplexity', 'in_batch_dim': [8, 448], 'batch_size': 8, 'flops': 8859220715008}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5378812603520}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4113209653888}, {'type': 'perplexity', 'in_batch_dim': [8, 320], 'batch_size': 8, 'flops': 6328014815744}, {'type': 'perplexity', 'in_batch_dim': [8, 560], 'batch_size': 8, 'flops': 11074025876864}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5378812603520}, {'type': 'perplexity', 'in_batch_dim': [8, 160], 'batch_size': 8, 'flops': 3164007441664}, {'type': 'perplexity', 'in_batch_dim': [8, 320], 'batch_size': 8, 'flops': 6328014815744}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5062411866112}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6011614078336}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4746011128704}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6011614078336}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3796808916480}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4429610391296}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4113209653888}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4746011128704}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3480408179072}, {'type': 'perplexity', 'in_batch_dim': [8, 384], 'batch_size': 8, 'flops': 7593617765376}, {'type': 'perplexity', 'in_batch_dim': [8, 576], 'batch_size': 8, 'flops': 11390426614272}, {'type': 'perplexity', 'in_batch_dim': [8, 160], 'batch_size': 8, 'flops': 3164007441664}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4746011128704}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4746011128704}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4429610391296}, {'type': 'perplexity', 'in_batch_dim': [8, 336], 'batch_size': 8, 'flops': 6644415553152}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4746011128704}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3796808916480}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3796808916480}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5062411866112}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5378812603520}, {'type': 'perplexity', 'in_batch_dim': [8, 368], 'batch_size': 8, 'flops': 7277217027968}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3480408179072}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4113209653888}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4746011128704}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3480408179072}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4746011128704}, {'type': 'perplexity', 'in_batch_dim': [8, 1168], 'batch_size': 8, 'flops': 23097253898368}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5062411866112}, {'type': 'perplexity', 'in_batch_dim': [8, 160], 'batch_size': 8, 'flops': 3164007441664}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4746011128704}, {'type': 'perplexity', 'in_batch_dim': [8, 640], 'batch_size': 8, 'flops': 12656029563904}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5378812603520}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4429610391296}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4429610391296}, {'type': 'perplexity', 'in_batch_dim': [8, 384], 'batch_size': 8, 'flops': 7593617765376}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3480408179072}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4113209653888}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3796808916480}, {'type': 'perplexity', 'in_batch_dim': [8, 336], 'batch_size': 8, 'flops': 6644415553152}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3480408179072}, {'type': 'perplexity', 'in_batch_dim': [6, 208], 'batch_size': 8, 'flops': 4113209653888}], 'timestamp': '2025-09-10 02:30:32.571318', 'step': 6909, 'epoch': 3} {'type': 'pplx', 'content': 24853930.15520345, 'timestamp': '2025-09-10 02:30:32.574149', 'step': 6909, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 8543129804160}, 'timestamp': '2025-09-10 02:30:32.608153', 'step': 6909, 'epoch': 3} {'type': 'loss', 'content': 9.375996887683868e-05, 'timestamp': '2025-09-10 02:30:32.616912', 'step': 6910, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 176], 'flops': 5220903722048}, 'timestamp': '2025-09-10 02:30:32.650941', 'step': 6910, 'epoch': 3} {'type': 'loss', 'content': 0.0002628415822982788, 'timestamp': '2025-09-10 02:30:32.654724', 'step': 6911, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 4746299996032}, 'timestamp': '2025-09-10 02:30:32.687238', 'step': 6911, 'epoch': 3} {'type': 'loss', 'content': 0.0002938093966804445, 'timestamp': '2025-09-10 02:30:32.711218', 'step': 6912, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-10 02:30:32.742241', 'step': 6912, 'epoch': 3} {'type': 'loss', 'content': 0.0008156453259289265, 'timestamp': '2025-09-10 02:30:32.744693', 'step': 6913, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 5695507448064}, 'timestamp': '2025-09-10 02:30:32.775489', 'step': 6913, 'epoch': 3} {'type': 'loss', 'content': 0.00015855650417506695, 'timestamp': '2025-09-10 02:30:32.779710', 'step': 6914, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 5695507448064}, 'timestamp': '2025-09-10 02:30:32.811619', 'step': 6914, 'epoch': 3} {'type': 'loss', 'content': 6.417724216589704e-05, 'timestamp': '2025-09-10 02:30:32.815751', 'step': 6915, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 7593922352128}, 'timestamp': '2025-09-10 02:30:32.847821', 'step': 6915, 'epoch': 3} {'type': 'loss', 'content': 0.011772527359426022, 'timestamp': '2025-09-10 02:30:32.875766', 'step': 6916, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 6170111174080}, 'timestamp': '2025-09-10 02:30:32.908515', 'step': 6916, 'epoch': 3} {'type': 'loss', 'content': 0.0008328754338435829, 'timestamp': '2025-09-10 02:30:32.912764', 'step': 6917, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 176], 'flops': 5220903722048}, 'timestamp': '2025-09-10 02:30:32.944174', 'step': 6917, 'epoch': 3} {'type': 'loss', 'content': 0.00012226430408190936, 'timestamp': '2025-09-10 02:30:32.948156', 'step': 6918, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 6644714900096}, 'timestamp': '2025-09-10 02:30:32.979169', 'step': 6918, 'epoch': 3} {'type': 'loss', 'content': 0.00023380214406643063, 'timestamp': '2025-09-10 02:30:32.985922', 'step': 6919, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 304], 'flops': 9017733530176}, 'timestamp': '2025-09-10 02:30:33.017286', 'step': 6919, 'epoch': 3} {'type': 'loss', 'content': 0.0002332628209842369, 'timestamp': '2025-09-10 02:30:33.049690', 'step': 6920, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 6644714900096}, 'timestamp': '2025-09-10 02:30:33.081821', 'step': 6920, 'epoch': 3} {'type': 'loss', 'content': 0.000248458469286561, 'timestamp': '2025-09-10 02:30:33.086063', 'step': 6921, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 8068526078144}, 'timestamp': '2025-09-10 02:30:33.117484', 'step': 6921, 'epoch': 3} {'type': 'loss', 'content': 0.00023750065884087235, 'timestamp': '2025-09-10 02:30:33.127207', 'step': 6922, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 6170111174080}, 'timestamp': '2025-09-10 02:30:33.158861', 'step': 6922, 'epoch': 3} {'type': 'loss', 'content': 0.00011210433149244636, 'timestamp': '2025-09-10 02:30:33.165640', 'step': 6923, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 7593922352128}, 'timestamp': '2025-09-10 02:30:33.198408', 'step': 6923, 'epoch': 3} {'type': 'loss', 'content': 0.00017960583500098437, 'timestamp': '2025-09-10 02:30:33.226801', 'step': 6924, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 7593922352128}, 'timestamp': '2025-09-10 02:30:33.259757', 'step': 6924, 'epoch': 3} {'type': 'loss', 'content': 0.00010823294724104926, 'timestamp': '2025-09-10 02:30:33.264770', 'step': 6925, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 6170111174080}, 'timestamp': '2025-09-10 02:30:33.298052', 'step': 6925, 'epoch': 3} {'type': 'loss', 'content': 0.001127683324739337, 'timestamp': '2025-09-10 02:30:33.304551', 'step': 6926, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 6644714900096}, 'timestamp': '2025-09-10 02:30:33.347085', 'step': 6926, 'epoch': 3} {'type': 'loss', 'content': 0.0002121505531249568, 'timestamp': '2025-09-10 02:30:33.353818', 'step': 6927, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 7119318626112}, 'timestamp': '2025-09-10 02:30:33.387708', 'step': 6927, 'epoch': 3} {'type': 'loss', 'content': 0.00015519419685006142, 'timestamp': '2025-09-10 02:30:33.415510', 'step': 6928, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 8543129804160}, 'timestamp': '2025-09-10 02:30:33.458245', 'step': 6928, 'epoch': 3} {'type': 'loss', 'content': 0.0001299724681302905, 'timestamp': '2025-09-10 02:30:33.466105', 'step': 6929, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 7593922352128}, 'timestamp': '2025-09-10 02:30:33.500059', 'step': 6929, 'epoch': 3} {'type': 'loss', 'content': 0.0001691354700597003, 'timestamp': '2025-09-10 02:30:33.507491', 'step': 6930, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 7593922352128}, 'timestamp': '2025-09-10 02:30:33.539099', 'step': 6930, 'epoch': 3} {'type': 'loss', 'content': 8.918951789382845e-05, 'timestamp': '2025-09-10 02:30:33.546449', 'step': 6931, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 5695507448064}, 'timestamp': '2025-09-10 02:30:33.579426', 'step': 6931, 'epoch': 3} {'type': 'loss', 'content': 0.00011090342741226777, 'timestamp': '2025-09-10 02:30:33.604561', 'step': 6932, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 6644714900096}, 'timestamp': '2025-09-10 02:30:33.636469', 'step': 6932, 'epoch': 3} {'type': 'loss', 'content': 0.00038635358214378357, 'timestamp': '2025-09-10 02:30:33.640763', 'step': 6933, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 304], 'flops': 9017733530176}, 'timestamp': '2025-09-10 02:30:33.675256', 'step': 6933, 'epoch': 3} {'type': 'loss', 'content': 0.0012322509428486228, 'timestamp': '2025-09-10 02:30:33.686697', 'step': 6934, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 7119318626112}, 'timestamp': '2025-09-10 02:30:33.719036', 'step': 6934, 'epoch': 3} {'type': 'loss', 'content': 0.0012386480811983347, 'timestamp': '2025-09-10 02:30:33.726134', 'step': 6935, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 7119318626112}, 'timestamp': '2025-09-10 02:30:33.758568', 'step': 6935, 'epoch': 3} {'type': 'loss', 'content': 3.629237107816152e-05, 'timestamp': '2025-09-10 02:30:33.786432', 'step': 6936, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 6170111174080}, 'timestamp': '2025-09-10 02:30:33.819712', 'step': 6936, 'epoch': 3} {'type': 'loss', 'content': 7.105556142050773e-05, 'timestamp': '2025-09-10 02:30:33.823893', 'step': 6937, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 8068526078144}, 'timestamp': '2025-09-10 02:30:33.859795', 'step': 6937, 'epoch': 3} {'type': 'loss', 'content': 9.639743802836165e-05, 'timestamp': '2025-09-10 02:30:33.869595', 'step': 6938, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 8068526078144}, 'timestamp': '2025-09-10 02:30:33.904854', 'step': 6938, 'epoch': 3} {'type': 'loss', 'content': 6.235136243049055e-05, 'timestamp': '2025-09-10 02:30:33.914782', 'step': 6939, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 304], 'flops': 9017733530176}, 'timestamp': '2025-09-10 02:30:33.947402', 'step': 6939, 'epoch': 3} {'type': 'loss', 'content': 0.0002960147976409644, 'timestamp': '2025-09-10 02:30:33.979908', 'step': 6940, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 5695507448064}, 'timestamp': '2025-09-10 02:30:34.012860', 'step': 6940, 'epoch': 3} {'type': 'loss', 'content': 0.007601078599691391, 'timestamp': '2025-09-10 02:30:34.024405', 'step': 6941, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 6644714900096}, 'timestamp': '2025-09-10 02:30:34.057014', 'step': 6941, 'epoch': 3} {'type': 'loss', 'content': 3.58233337465208e-05, 'timestamp': '2025-09-10 02:30:34.063787', 'step': 6942, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-10 02:30:34.100482', 'step': 6942, 'epoch': 3} {'type': 'loss', 'content': 0.0015243064844980836, 'timestamp': '2025-09-10 02:30:34.103494', 'step': 6943, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 6170111174080}, 'timestamp': '2025-09-10 02:30:34.138373', 'step': 6943, 'epoch': 3} {'type': 'loss', 'content': 0.00024156909785233438, 'timestamp': '2025-09-10 02:30:34.165987', 'step': 6944, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 480], 'flops': 14238374516352}, 'timestamp': '2025-09-10 02:30:34.216927', 'step': 6944, 'epoch': 3} {'type': 'loss', 'content': 0.00428308779373765, 'timestamp': '2025-09-10 02:30:34.233895', 'step': 6945, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 8068526078144}, 'timestamp': '2025-09-10 02:30:34.272835', 'step': 6945, 'epoch': 3} {'type': 'loss', 'content': 0.029404859989881516, 'timestamp': '2025-09-10 02:30:34.282825', 'step': 6946, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 7593922352128}, 'timestamp': '2025-09-10 02:30:34.329763', 'step': 6946, 'epoch': 3} {'type': 'loss', 'content': 0.00013546801346819848, 'timestamp': '2025-09-10 02:30:34.337397', 'step': 6947, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 7119318626112}, 'timestamp': '2025-09-10 02:30:34.375921', 'step': 6947, 'epoch': 3} {'type': 'loss', 'content': 3.598109833546914e-05, 'timestamp': '2025-09-10 02:30:34.404145', 'step': 6948, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 176], 'flops': 5220903722048}, 'timestamp': '2025-09-10 02:30:34.439246', 'step': 6948, 'epoch': 3} {'type': 'loss', 'content': 0.00020628688798751682, 'timestamp': '2025-09-10 02:30:34.442744', 'step': 6949, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 416], 'flops': 12339959612288}, 'timestamp': '2025-09-10 02:30:34.487136', 'step': 6949, 'epoch': 3} {'type': 'loss', 'content': 0.00010723454033723101, 'timestamp': '2025-09-10 02:30:34.503012', 'step': 6950, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 5695507448064}, 'timestamp': '2025-09-10 02:30:34.539197', 'step': 6950, 'epoch': 3} {'type': 'loss', 'content': 0.00011623270984273404, 'timestamp': '2025-09-10 02:30:34.547880', 'step': 6951, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 5695507448064}, 'timestamp': '2025-09-10 02:30:34.586654', 'step': 6951, 'epoch': 3} {'type': 'loss', 'content': 0.0001242592406924814, 'timestamp': '2025-09-10 02:30:34.611887', 'step': 6952, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 7593922352128}, 'timestamp': '2025-09-10 02:30:34.643970', 'step': 6952, 'epoch': 3} {'type': 'loss', 'content': 0.0004370961687527597, 'timestamp': '2025-09-10 02:30:34.649214', 'step': 6953, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 8543129804160}, 'timestamp': '2025-09-10 02:30:34.690287', 'step': 6953, 'epoch': 3} {'type': 'loss', 'content': 0.0002730258565861732, 'timestamp': '2025-09-10 02:30:34.700991', 'step': 6954, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 7119318626112}, 'timestamp': '2025-09-10 02:30:34.736851', 'step': 6954, 'epoch': 3} {'type': 'loss', 'content': 0.00011180860747117549, 'timestamp': '2025-09-10 02:30:34.744122', 'step': 6955, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 336], 'flops': 9966940982208}, 'timestamp': '2025-09-10 02:30:34.783004', 'step': 6955, 'epoch': 3} {'type': 'loss', 'content': 0.0004824143834412098, 'timestamp': '2025-09-10 02:30:34.817426', 'step': 6956, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-10 02:30:34.849646', 'step': 6956, 'epoch': 3} {'type': 'loss', 'content': 0.00022752817312721163, 'timestamp': '2025-09-10 02:30:34.851825', 'step': 6957, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 6644714900096}, 'timestamp': '2025-09-10 02:30:34.883349', 'step': 6957, 'epoch': 3} {'type': 'loss', 'content': 0.0062463474459946156, 'timestamp': '2025-09-10 02:30:34.890202', 'step': 6958, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 6644714900096}, 'timestamp': '2025-09-10 02:30:34.922061', 'step': 6958, 'epoch': 3} {'type': 'loss', 'content': 5.5043336033122614e-05, 'timestamp': '2025-09-10 02:30:34.928820', 'step': 6959, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 496], 'flops': 14712978242368}, 'timestamp': '2025-09-10 02:30:34.972076', 'step': 6959, 'epoch': 3} {'type': 'loss', 'content': 0.0005719130276702344, 'timestamp': '2025-09-10 02:30:35.010467', 'step': 6960, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 8068526078144}, 'timestamp': '2025-09-10 02:30:35.043747', 'step': 6960, 'epoch': 3} {'type': 'loss', 'content': 0.00044621675624512136, 'timestamp': '2025-09-10 02:30:35.050415', 'step': 6961, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 8068526078144}, 'timestamp': '2025-09-10 02:30:35.084469', 'step': 6961, 'epoch': 3} {'type': 'loss', 'content': 0.00034819470602087677, 'timestamp': '2025-09-10 02:30:35.094215', 'step': 6962, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 6170111174080}, 'timestamp': '2025-09-10 02:30:35.126745', 'step': 6962, 'epoch': 3} {'type': 'loss', 'content': 0.00016455540026072413, 'timestamp': '2025-09-10 02:30:35.133099', 'step': 6963, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-10 02:30:35.164791', 'step': 6963, 'epoch': 3} {'type': 'loss', 'content': 5.0337421271251515e-05, 'timestamp': '2025-09-10 02:30:35.188773', 'step': 6964, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 7593922352128}, 'timestamp': '2025-09-10 02:30:35.221492', 'step': 6964, 'epoch': 3} {'type': 'loss', 'content': 0.00010189624299528077, 'timestamp': '2025-09-10 02:30:35.226632', 'step': 6965, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 416], 'flops': 12339959612288}, 'timestamp': '2025-09-10 02:30:35.264821', 'step': 6965, 'epoch': 3} {'type': 'loss', 'content': 3.955454667448066e-05, 'timestamp': '2025-09-10 02:30:35.280698', 'step': 6966, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 8543129804160}, 'timestamp': '2025-09-10 02:30:35.314438', 'step': 6966, 'epoch': 3} {'type': 'loss', 'content': 0.0009393363143317401, 'timestamp': '2025-09-10 02:30:35.324855', 'step': 6967, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 6644714900096}, 'timestamp': '2025-09-10 02:30:35.356816', 'step': 6967, 'epoch': 3} {'type': 'loss', 'content': 3.750239193323068e-05, 'timestamp': '2025-09-10 02:30:35.384330', 'step': 6968, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 7119318626112}, 'timestamp': '2025-09-10 02:30:35.418787', 'step': 6968, 'epoch': 3} {'type': 'loss', 'content': 7.956552872201428e-05, 'timestamp': '2025-09-10 02:30:35.423625', 'step': 6969, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 7593922352128}, 'timestamp': '2025-09-10 02:30:35.459261', 'step': 6969, 'epoch': 3} {'type': 'loss', 'content': 0.0006180386990308762, 'timestamp': '2025-09-10 02:30:35.466933', 'step': 6970, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 368], 'flops': 10916148434240}, 'timestamp': '2025-09-10 02:30:35.502471', 'step': 6970, 'epoch': 3} {'type': 'loss', 'content': 0.00042301107896491885, 'timestamp': '2025-09-10 02:30:35.516228', 'step': 6971, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 6644714900096}, 'timestamp': '2025-09-10 02:30:35.547668', 'step': 6971, 'epoch': 3} {'type': 'loss', 'content': 0.00011797425395343453, 'timestamp': '2025-09-10 02:30:35.575234', 'step': 6972, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 7119318626112}, 'timestamp': '2025-09-10 02:30:35.607400', 'step': 6972, 'epoch': 3} {'type': 'loss', 'content': 1.7395021131960675e-05, 'timestamp': '2025-09-10 02:30:35.612305', 'step': 6973, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 6644714900096}, 'timestamp': '2025-09-10 02:30:35.643800', 'step': 6973, 'epoch': 3} {'type': 'loss', 'content': 4.854817962041125e-05, 'timestamp': '2025-09-10 02:30:35.650850', 'step': 6974, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 8068526078144}, 'timestamp': '2025-09-10 02:30:35.681964', 'step': 6974, 'epoch': 3} {'type': 'loss', 'content': 0.0004031723365187645, 'timestamp': '2025-09-10 02:30:35.692077', 'step': 6975, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 176], 'flops': 5220903722048}, 'timestamp': '2025-09-10 02:30:35.725419', 'step': 6975, 'epoch': 3} {'type': 'loss', 'content': 0.026183495298027992, 'timestamp': '2025-09-10 02:30:35.749986', 'step': 6976, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 400], 'flops': 11865355886272}, 'timestamp': '2025-09-10 02:30:35.787096', 'step': 6976, 'epoch': 3} {'type': 'loss', 'content': 1.9930823327740654e-05, 'timestamp': '2025-09-10 02:30:35.802258', 'step': 6977, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 6644714900096}, 'timestamp': '2025-09-10 02:30:35.835958', 'step': 6977, 'epoch': 3} {'type': 'loss', 'content': 3.449880387051962e-05, 'timestamp': '2025-09-10 02:30:35.842859', 'step': 6978, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 8543129804160}, 'timestamp': '2025-09-10 02:30:35.875898', 'step': 6978, 'epoch': 3} {'type': 'loss', 'content': 7.551814633188769e-05, 'timestamp': '2025-09-10 02:30:35.886516', 'step': 6979, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 336], 'flops': 9966940982208}, 'timestamp': '2025-09-10 02:30:35.920589', 'step': 6979, 'epoch': 3} {'type': 'loss', 'content': 0.00020103261340409517, 'timestamp': '2025-09-10 02:30:35.954756', 'step': 6980, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 304], 'flops': 9017733530176}, 'timestamp': '2025-09-10 02:30:35.987128', 'step': 6980, 'epoch': 3} {'type': 'loss', 'content': 0.0001782312901923433, 'timestamp': '2025-09-10 02:30:35.996757', 'step': 6981, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 7119318626112}, 'timestamp': '2025-09-10 02:30:36.032245', 'step': 6981, 'epoch': 3} {'type': 'loss', 'content': 5.386146222008392e-05, 'timestamp': '2025-09-10 02:30:36.039500', 'step': 6982, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 304], 'flops': 9017733530176}, 'timestamp': '2025-09-10 02:30:36.071475', 'step': 6982, 'epoch': 3} {'type': 'loss', 'content': 0.014369412325322628, 'timestamp': '2025-09-10 02:30:36.083000', 'step': 6983, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 6644714900096}, 'timestamp': '2025-09-10 02:30:36.114751', 'step': 6983, 'epoch': 3} {'type': 'loss', 'content': 0.00018798027304001153, 'timestamp': '2025-09-10 02:30:36.142496', 'step': 6984, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 336], 'flops': 9966940982208}, 'timestamp': '2025-09-10 02:30:36.174099', 'step': 6984, 'epoch': 3} {'type': 'loss', 'content': 0.00023625533503945917, 'timestamp': '2025-09-10 02:30:36.186822', 'step': 6985, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 320], 'flops': 9492337256192}, 'timestamp': '2025-09-10 02:30:36.218572', 'step': 6985, 'epoch': 3} {'type': 'loss', 'content': 8.370379509869963e-05, 'timestamp': '2025-09-10 02:30:36.231242', 'step': 6986, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 6644714900096}, 'timestamp': '2025-09-10 02:30:36.262615', 'step': 6986, 'epoch': 3} {'type': 'loss', 'content': 0.0004739656869787723, 'timestamp': '2025-09-10 02:30:36.269571', 'step': 6987, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-10 02:30:36.300097', 'step': 6987, 'epoch': 3} {'type': 'loss', 'content': 1.8654181985766627e-05, 'timestamp': '2025-09-10 02:30:36.323594', 'step': 6988, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 4746299996032}, 'timestamp': '2025-09-10 02:30:36.353988', 'step': 6988, 'epoch': 3} {'type': 'loss', 'content': 7.558034121757373e-05, 'timestamp': '2025-09-10 02:30:36.356105', 'step': 6989, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 6644714900096}, 'timestamp': '2025-09-10 02:30:36.387777', 'step': 6989, 'epoch': 3} {'type': 'loss', 'content': 0.0002668748202268034, 'timestamp': '2025-09-10 02:30:36.394720', 'step': 6990, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 6644714900096}, 'timestamp': '2025-09-10 02:30:36.425215', 'step': 6990, 'epoch': 3} {'type': 'loss', 'content': 0.00021226401440799236, 'timestamp': '2025-09-10 02:30:36.432234', 'step': 6991, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 7119318626112}, 'timestamp': '2025-09-10 02:30:36.463874', 'step': 6991, 'epoch': 3} {'type': 'loss', 'content': 0.00016221609257627279, 'timestamp': '2025-09-10 02:30:36.492197', 'step': 6992, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 7119318626112}, 'timestamp': '2025-09-10 02:30:36.522883', 'step': 6992, 'epoch': 3} {'type': 'loss', 'content': 8.354503370355815e-05, 'timestamp': '2025-09-10 02:30:36.528161', 'step': 6993, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 8068526078144}, 'timestamp': '2025-09-10 02:30:36.559418', 'step': 6993, 'epoch': 3} {'type': 'loss', 'content': 2.1902184016653337e-05, 'timestamp': '2025-09-10 02:30:36.569482', 'step': 6994, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 6170111174080}, 'timestamp': '2025-09-10 02:30:36.602148', 'step': 6994, 'epoch': 3} {'type': 'loss', 'content': 0.00011977553367614746, 'timestamp': '2025-09-10 02:30:36.608742', 'step': 6995, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 384], 'flops': 11390752160256}, 'timestamp': '2025-09-10 02:30:36.645587', 'step': 6995, 'epoch': 3} {'type': 'loss', 'content': 0.0002517557586543262, 'timestamp': '2025-09-10 02:30:36.680558', 'step': 6996, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 368], 'flops': 10916148434240}, 'timestamp': '2025-09-10 02:30:36.714025', 'step': 6996, 'epoch': 3} {'type': 'loss', 'content': 9.101787145482376e-05, 'timestamp': '2025-09-10 02:30:36.727174', 'step': 6997, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 176], 'flops': 5220903722048}, 'timestamp': '2025-09-10 02:30:36.758526', 'step': 6997, 'epoch': 3} {'type': 'loss', 'content': 0.0001227896282216534, 'timestamp': '2025-09-10 02:30:36.762573', 'step': 6998, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 368], 'flops': 10916148434240}, 'timestamp': '2025-09-10 02:30:36.797864', 'step': 6998, 'epoch': 3} {'type': 'loss', 'content': 7.544071559095755e-05, 'timestamp': '2025-09-10 02:30:36.811692', 'step': 6999, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 6170111174080}, 'timestamp': '2025-09-10 02:30:36.844402', 'step': 6999, 'epoch': 3} {'type': 'loss', 'content': 0.003692924277856946, 'timestamp': '2025-09-10 02:30:36.872282', 'step': 7000, 'epoch': 3} {'type': 'info', 'content': 'Checkpoint saved at step 7000', 'timestamp': '2025-09-10 02:30:42.318498', 'step': 7000, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 7593922352128}, 'timestamp': '2025-09-10 02:30:42.362451', 'step': 7000, 'epoch': 3} {'type': 'loss', 'content': 7.54517168388702e-05, 'timestamp': '2025-09-10 02:30:42.366545', 'step': 7001, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 176], 'flops': 5220903722048}, 'timestamp': '2025-09-10 02:30:42.398762', 'step': 7001, 'epoch': 3} {'type': 'loss', 'content': 7.800796447554603e-05, 'timestamp': '2025-09-10 02:30:42.402311', 'step': 7002, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 6170111174080}, 'timestamp': '2025-09-10 02:30:42.434025', 'step': 7002, 'epoch': 3} {'type': 'loss', 'content': 0.029476981610059738, 'timestamp': '2025-09-10 02:30:42.440768', 'step': 7003, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 176], 'flops': 5220903722048}, 'timestamp': '2025-09-10 02:30:42.472458', 'step': 7003, 'epoch': 3} {'type': 'loss', 'content': 0.0001302637974731624, 'timestamp': '2025-09-10 02:30:42.496887', 'step': 7004, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 320], 'flops': 9492337256192}, 'timestamp': '2025-09-10 02:30:42.530135', 'step': 7004, 'epoch': 3} {'type': 'loss', 'content': 0.011870044283568859, 'timestamp': '2025-09-10 02:30:42.540086', 'step': 7005, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 6644714900096}, 'timestamp': '2025-09-10 02:30:42.573458', 'step': 7005, 'epoch': 3} {'type': 'loss', 'content': 0.00021210841077845544, 'timestamp': '2025-09-10 02:30:42.580218', 'step': 7006, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 7119318626112}, 'timestamp': '2025-09-10 02:30:42.612231', 'step': 7006, 'epoch': 3} {'type': 'loss', 'content': 5.902814882574603e-05, 'timestamp': '2025-09-10 02:30:42.619093', 'step': 7007, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 8068526078144}, 'timestamp': '2025-09-10 02:30:42.652006', 'step': 7007, 'epoch': 3} {'type': 'loss', 'content': 0.0002512595965526998, 'timestamp': '2025-09-10 02:30:42.682861', 'step': 7008, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 8068526078144}, 'timestamp': '2025-09-10 02:30:42.718324', 'step': 7008, 'epoch': 3} {'type': 'loss', 'content': 0.0005681976908817887, 'timestamp': '2025-09-10 02:30:42.726159', 'step': 7009, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 5695507448064}, 'timestamp': '2025-09-10 02:30:42.761462', 'step': 7009, 'epoch': 3} {'type': 'loss', 'content': 0.0010124502005055547, 'timestamp': '2025-09-10 02:30:42.766074', 'step': 7010, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-10 02:30:42.797001', 'step': 7010, 'epoch': 3} {'type': 'loss', 'content': 0.0010337395360693336, 'timestamp': '2025-09-10 02:30:42.799307', 'step': 7011, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 7119318626112}, 'timestamp': '2025-09-10 02:30:42.831691', 'step': 7011, 'epoch': 3} {'type': 'loss', 'content': 0.000359332247171551, 'timestamp': '2025-09-10 02:30:42.860163', 'step': 7012, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 5695507448064}, 'timestamp': '2025-09-10 02:30:42.892069', 'step': 7012, 'epoch': 3} {'type': 'loss', 'content': 0.0001721430744510144, 'timestamp': '2025-09-10 02:30:42.896594', 'step': 7013, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 8543129804160}, 'timestamp': '2025-09-10 02:30:42.928749', 'step': 7013, 'epoch': 3} {'type': 'loss', 'content': 0.0001188502719742246, 'timestamp': '2025-09-10 02:30:42.939898', 'step': 7014, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 176], 'flops': 5220903722048}, 'timestamp': '2025-09-10 02:30:42.975516', 'step': 7014, 'epoch': 3} {'type': 'loss', 'content': 0.00013335456606000662, 'timestamp': '2025-09-10 02:30:42.979666', 'step': 7015, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 176], 'flops': 5220903722048}, 'timestamp': '2025-09-10 02:30:43.012243', 'step': 7015, 'epoch': 3} {'type': 'loss', 'content': 7.617595110787079e-05, 'timestamp': '2025-09-10 02:30:43.036918', 'step': 7016, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 8543129804160}, 'timestamp': '2025-09-10 02:30:43.071708', 'step': 7016, 'epoch': 3} {'type': 'loss', 'content': 0.0004477399925235659, 'timestamp': '2025-09-10 02:30:43.080414', 'step': 7017, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 336], 'flops': 9966940982208}, 'timestamp': '2025-09-10 02:30:43.120632', 'step': 7017, 'epoch': 3} {'type': 'loss', 'content': 8.525385055691004e-05, 'timestamp': '2025-09-10 02:30:43.134005', 'step': 7018, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 7119318626112}, 'timestamp': '2025-09-10 02:30:43.167311', 'step': 7018, 'epoch': 3} {'type': 'loss', 'content': 0.0006595923332497478, 'timestamp': '2025-09-10 02:30:43.174667', 'step': 7019, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 400], 'flops': 11865355886272}, 'timestamp': '2025-09-10 02:30:43.214268', 'step': 7019, 'epoch': 3} {'type': 'loss', 'content': 4.695385359809734e-05, 'timestamp': '2025-09-10 02:30:43.250768', 'step': 7020, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 7593922352128}, 'timestamp': '2025-09-10 02:30:43.281538', 'step': 7020, 'epoch': 3} {'type': 'loss', 'content': 0.00017301096522714943, 'timestamp': '2025-09-10 02:30:43.299985', 'step': 7021, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 480], 'flops': 14238374516352}, 'timestamp': '2025-09-10 02:30:43.357493', 'step': 7021, 'epoch': 3} {'type': 'loss', 'content': 0.0001524223480373621, 'timestamp': '2025-09-10 02:30:43.374840', 'step': 7022, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 6644714900096}, 'timestamp': '2025-09-10 02:30:43.405945', 'step': 7022, 'epoch': 3} {'type': 'loss', 'content': 0.00013412600674200803, 'timestamp': '2025-09-10 02:30:43.413011', 'step': 7023, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 5695507448064}, 'timestamp': '2025-09-10 02:30:43.448479', 'step': 7023, 'epoch': 3} {'type': 'loss', 'content': 0.00017144733283203095, 'timestamp': '2025-09-10 02:30:43.473773', 'step': 7024, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 5695507448064}, 'timestamp': '2025-09-10 02:30:43.510354', 'step': 7024, 'epoch': 3} {'type': 'loss', 'content': 0.00034450864768587053, 'timestamp': '2025-09-10 02:30:43.512714', 'step': 7025, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 6644714900096}, 'timestamp': '2025-09-10 02:30:43.544358', 'step': 7025, 'epoch': 3} {'type': 'loss', 'content': 6.39140052953735e-05, 'timestamp': '2025-09-10 02:30:43.551274', 'step': 7026, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 6170111174080}, 'timestamp': '2025-09-10 02:30:43.584415', 'step': 7026, 'epoch': 3} {'type': 'loss', 'content': 0.0004727788909804076, 'timestamp': '2025-09-10 02:30:43.591039', 'step': 7027, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 8543129804160}, 'timestamp': '2025-09-10 02:30:43.622903', 'step': 7027, 'epoch': 3} {'type': 'loss', 'content': 0.0025320053100585938, 'timestamp': '2025-09-10 02:30:43.654789', 'step': 7028, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 8068526078144}, 'timestamp': '2025-09-10 02:30:43.690290', 'step': 7028, 'epoch': 3} {'type': 'loss', 'content': 0.00012714836339000612, 'timestamp': '2025-09-10 02:30:43.697951', 'step': 7029, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 6170111174080}, 'timestamp': '2025-09-10 02:30:43.733691', 'step': 7029, 'epoch': 3} {'type': 'loss', 'content': 8.670837996760383e-05, 'timestamp': '2025-09-10 02:30:43.740638', 'step': 7030, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:30:43.773773', 'step': 7030, 'epoch': 3} {'type': 'loss', 'content': 0.011012664996087551, 'timestamp': '2025-09-10 02:30:43.778300', 'step': 7031, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 176], 'flops': 5220903722048}, 'timestamp': '2025-09-10 02:30:43.811604', 'step': 7031, 'epoch': 3} {'type': 'loss', 'content': 0.00020046999270562083, 'timestamp': '2025-09-10 02:30:43.836388', 'step': 7032, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 320], 'flops': 9492337256192}, 'timestamp': '2025-09-10 02:30:43.869027', 'step': 7032, 'epoch': 3} {'type': 'loss', 'content': 0.00012668267299886793, 'timestamp': '2025-09-10 02:30:43.878628', 'step': 7033, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 4746299996032}, 'timestamp': '2025-09-10 02:30:43.910975', 'step': 7033, 'epoch': 3} {'type': 'loss', 'content': 5.517674799193628e-05, 'timestamp': '2025-09-10 02:30:43.913744', 'step': 7034, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 6170111174080}, 'timestamp': '2025-09-10 02:30:43.947657', 'step': 7034, 'epoch': 3} {'type': 'loss', 'content': 0.0005423200782388449, 'timestamp': '2025-09-10 02:30:43.954538', 'step': 7035, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 352], 'flops': 10441544708224}, 'timestamp': '2025-09-10 02:30:43.990008', 'step': 7035, 'epoch': 3} {'type': 'loss', 'content': 0.0030603199265897274, 'timestamp': '2025-09-10 02:30:44.024506', 'step': 7036, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 352], 'flops': 10441544708224}, 'timestamp': '2025-09-10 02:30:44.062450', 'step': 7036, 'epoch': 3} {'type': 'loss', 'content': 5.9157235227758065e-05, 'timestamp': '2025-09-10 02:30:44.075484', 'step': 7037, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 6644714900096}, 'timestamp': '2025-09-10 02:30:44.117288', 'step': 7037, 'epoch': 3} {'type': 'loss', 'content': 3.580377597245388e-05, 'timestamp': '2025-09-10 02:30:44.124431', 'step': 7038, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 8543129804160}, 'timestamp': '2025-09-10 02:30:44.157272', 'step': 7038, 'epoch': 3} {'type': 'loss', 'content': 0.00011388809798518196, 'timestamp': '2025-09-10 02:30:44.168210', 'step': 7039, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 8068526078144}, 'timestamp': '2025-09-10 02:30:44.200882', 'step': 7039, 'epoch': 3} {'type': 'loss', 'content': 7.329420623136684e-05, 'timestamp': '2025-09-10 02:30:44.232066', 'step': 7040, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 384], 'flops': 11390752160256}, 'timestamp': '2025-09-10 02:30:44.270763', 'step': 7040, 'epoch': 3} {'type': 'loss', 'content': 9.624590165913105e-05, 'timestamp': '2025-09-10 02:30:44.284081', 'step': 7041, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 464], 'flops': 13763770790336}, 'timestamp': '2025-09-10 02:30:44.334268', 'step': 7041, 'epoch': 3} {'type': 'loss', 'content': 0.00018388082389719784, 'timestamp': '2025-09-10 02:30:44.351364', 'step': 7042, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 5695507448064}, 'timestamp': '2025-09-10 02:30:44.383125', 'step': 7042, 'epoch': 3} {'type': 'loss', 'content': 2.913156640715897e-05, 'timestamp': '2025-09-10 02:30:44.387484', 'step': 7043, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 5695507448064}, 'timestamp': '2025-09-10 02:30:44.419803', 'step': 7043, 'epoch': 3} {'type': 'loss', 'content': 0.0005409237928688526, 'timestamp': '2025-09-10 02:30:44.445221', 'step': 7044, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 336], 'flops': 9966940982208}, 'timestamp': '2025-09-10 02:30:44.480208', 'step': 7044, 'epoch': 3} {'type': 'loss', 'content': 2.076716918963939e-05, 'timestamp': '2025-09-10 02:30:44.492853', 'step': 7045, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 8068526078144}, 'timestamp': '2025-09-10 02:30:44.528299', 'step': 7045, 'epoch': 3} {'type': 'loss', 'content': 0.00021151323744561523, 'timestamp': '2025-09-10 02:30:44.538395', 'step': 7046, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 7119318626112}, 'timestamp': '2025-09-10 02:30:44.573351', 'step': 7046, 'epoch': 3} {'type': 'loss', 'content': 4.7153665946098045e-05, 'timestamp': '2025-09-10 02:30:44.580768', 'step': 7047, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 304], 'flops': 9017733530176}, 'timestamp': '2025-09-10 02:30:44.616096', 'step': 7047, 'epoch': 3} {'type': 'loss', 'content': 0.0007388163357973099, 'timestamp': '2025-09-10 02:30:44.649270', 'step': 7048, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 176], 'flops': 5220903722048}, 'timestamp': '2025-09-10 02:30:44.684828', 'step': 7048, 'epoch': 3} {'type': 'loss', 'content': 9.87757885013707e-05, 'timestamp': '2025-09-10 02:30:44.688511', 'step': 7049, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 7119318626112}, 'timestamp': '2025-09-10 02:30:44.720590', 'step': 7049, 'epoch': 3} {'type': 'loss', 'content': 5.2101851906627417e-05, 'timestamp': '2025-09-10 02:30:44.728049', 'step': 7050, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 6170111174080}, 'timestamp': '2025-09-10 02:30:44.760527', 'step': 7050, 'epoch': 3} {'type': 'loss', 'content': 4.3754731450462714e-05, 'timestamp': '2025-09-10 02:30:44.767356', 'step': 7051, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 464], 'flops': 13763770790336}, 'timestamp': '2025-09-10 02:30:44.810758', 'step': 7051, 'epoch': 3} {'type': 'loss', 'content': 0.0001889723789645359, 'timestamp': '2025-09-10 02:30:44.848646', 'step': 7052, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 6644714900096}, 'timestamp': '2025-09-10 02:30:44.884716', 'step': 7052, 'epoch': 3} {'type': 'loss', 'content': 0.00012450621579773724, 'timestamp': '2025-09-10 02:30:44.889308', 'step': 7053, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 7119318626112}, 'timestamp': '2025-09-10 02:30:44.920683', 'step': 7053, 'epoch': 3} {'type': 'loss', 'content': 0.00014559333794750273, 'timestamp': '2025-09-10 02:30:44.928109', 'step': 7054, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 336], 'flops': 9966940982208}, 'timestamp': '2025-09-10 02:30:44.964282', 'step': 7054, 'epoch': 3} {'type': 'loss', 'content': 0.00017697580915410072, 'timestamp': '2025-09-10 02:30:44.977610', 'step': 7055, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 448], 'flops': 13289167064320}, 'timestamp': '2025-09-10 02:30:45.023083', 'step': 7055, 'epoch': 3} {'type': 'loss', 'content': 0.00017205542826559395, 'timestamp': '2025-09-10 02:30:45.060332', 'step': 7056, 'epoch': 3} {'type': 'flops', 'content': [{'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5695213340928}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3796808916480}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4429610391296}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4746011128704}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4746011128704}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6011614078336}, {'type': 'perplexity', 'in_batch_dim': [8, 736], 'batch_size': 8, 'flops': 14554433988352}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3796808916480}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4113209653888}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3796808916480}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4746011128704}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2847606704256}, {'type': 'perplexity', 'in_batch_dim': [8, 320], 'batch_size': 8, 'flops': 6328014815744}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5378812603520}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3796808916480}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4746011128704}, {'type': 'perplexity', 'in_batch_dim': [8, 352], 'batch_size': 8, 'flops': 6960816290560}, {'type': 'perplexity', 'in_batch_dim': [8, 384], 'batch_size': 8, 'flops': 7593617765376}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5378812603520}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4429610391296}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4113209653888}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5062411866112}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5062411866112}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4429610391296}, {'type': 'perplexity', 'in_batch_dim': [8, 480], 'batch_size': 8, 'flops': 9492022189824}, {'type': 'perplexity', 'in_batch_dim': [8, 448], 'batch_size': 8, 'flops': 8859220715008}, {'type': 'perplexity', 'in_batch_dim': [8, 320], 'batch_size': 8, 'flops': 6328014815744}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3796808916480}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6011614078336}, {'type': 'perplexity', 'in_batch_dim': [8, 320], 'batch_size': 8, 'flops': 6328014815744}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5695213340928}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4746011128704}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4429610391296}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5378812603520}, {'type': 'perplexity', 'in_batch_dim': [8, 544], 'batch_size': 8, 'flops': 10757625139456}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4429610391296}, {'type': 'perplexity', 'in_batch_dim': [8, 320], 'batch_size': 8, 'flops': 6328014815744}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4113209653888}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4113209653888}, {'type': 'perplexity', 'in_batch_dim': [8, 320], 'batch_size': 8, 'flops': 6328014815744}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3480408179072}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5062411866112}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6011614078336}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4429610391296}, {'type': 'perplexity', 'in_batch_dim': [8, 384], 'batch_size': 8, 'flops': 7593617765376}, {'type': 'perplexity', 'in_batch_dim': [8, 416], 'batch_size': 8, 'flops': 8226419240192}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4746011128704}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3480408179072}, {'type': 'perplexity', 'in_batch_dim': [8, 384], 'batch_size': 8, 'flops': 7593617765376}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3480408179072}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3480408179072}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5378812603520}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3796808916480}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4113209653888}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5062411866112}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5378812603520}, {'type': 'perplexity', 'in_batch_dim': [8, 320], 'batch_size': 8, 'flops': 6328014815744}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4429610391296}, {'type': 'perplexity', 'in_batch_dim': [8, 160], 'batch_size': 8, 'flops': 3164007441664}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4113209653888}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5695213340928}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3796808916480}, {'type': 'perplexity', 'in_batch_dim': [8, 160], 'batch_size': 8, 'flops': 3164007441664}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6011614078336}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5062411866112}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4429610391296}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3480408179072}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4746011128704}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3480408179072}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4113209653888}, {'type': 'perplexity', 'in_batch_dim': [8, 624], 'batch_size': 8, 'flops': 12339628826496}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4113209653888}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5695213340928}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5062411866112}, {'type': 'perplexity', 'in_batch_dim': [8, 384], 'batch_size': 8, 'flops': 7593617765376}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4429610391296}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4113209653888}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5062411866112}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4429610391296}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5695213340928}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3796808916480}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5378812603520}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4429610391296}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5378812603520}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3796808916480}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5695213340928}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4429610391296}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5062411866112}, {'type': 'perplexity', 'in_batch_dim': [8, 160], 'batch_size': 8, 'flops': 3164007441664}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4746011128704}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5695213340928}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3796808916480}, {'type': 'perplexity', 'in_batch_dim': [8, 416], 'batch_size': 8, 'flops': 8226419240192}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3796808916480}, {'type': 'perplexity', 'in_batch_dim': [8, 160], 'batch_size': 8, 'flops': 3164007441664}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4746011128704}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3796808916480}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2847606704256}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5378812603520}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4429610391296}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4113209653888}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4429610391296}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6011614078336}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4113209653888}, {'type': 'perplexity', 'in_batch_dim': [8, 320], 'batch_size': 8, 'flops': 6328014815744}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4746011128704}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4746011128704}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3796808916480}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5062411866112}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3480408179072}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3796808916480}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3480408179072}, {'type': 'perplexity', 'in_batch_dim': [8, 352], 'batch_size': 8, 'flops': 6960816290560}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5062411866112}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4429610391296}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6011614078336}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5062411866112}, {'type': 'perplexity', 'in_batch_dim': [8, 576], 'batch_size': 8, 'flops': 11390426614272}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4429610391296}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3796808916480}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6011614078336}, {'type': 'perplexity', 'in_batch_dim': [8, 432], 'batch_size': 8, 'flops': 8542819977600}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5378812603520}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5378812603520}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5062411866112}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4113209653888}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4113209653888}, {'type': 'perplexity', 'in_batch_dim': [8, 368], 'batch_size': 8, 'flops': 7277217027968}, {'type': 'perplexity', 'in_batch_dim': [8, 160], 'batch_size': 8, 'flops': 3164007441664}, {'type': 'perplexity', 'in_batch_dim': [8, 368], 'batch_size': 8, 'flops': 7277217027968}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3796808916480}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4746011128704}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4113209653888}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4429610391296}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5695213340928}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5695213340928}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5062411866112}, {'type': 'perplexity', 'in_batch_dim': [8, 432], 'batch_size': 8, 'flops': 8542819977600}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5062411866112}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4113209653888}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3480408179072}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3480408179072}, {'type': 'perplexity', 'in_batch_dim': [8, 160], 'batch_size': 8, 'flops': 3164007441664}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4113209653888}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3796808916480}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5062411866112}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 368], 'batch_size': 8, 'flops': 7277217027968}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5378812603520}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5378812603520}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5062411866112}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2847606704256}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4429610391296}, {'type': 'perplexity', 'in_batch_dim': [8, 320], 'batch_size': 8, 'flops': 6328014815744}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3796808916480}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4429610391296}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4113209653888}, {'type': 'perplexity', 'in_batch_dim': [8, 384], 'batch_size': 8, 'flops': 7593617765376}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5378812603520}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4746011128704}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3480408179072}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5062411866112}, {'type': 'perplexity', 'in_batch_dim': [8, 320], 'batch_size': 8, 'flops': 6328014815744}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5378812603520}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4113209653888}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4113209653888}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5695213340928}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3480408179072}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6011614078336}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4746011128704}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5378812603520}, {'type': 'perplexity', 'in_batch_dim': [8, 320], 'batch_size': 8, 'flops': 6328014815744}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4429610391296}, {'type': 'perplexity', 'in_batch_dim': [8, 320], 'batch_size': 8, 'flops': 6328014815744}, {'type': 'perplexity', 'in_batch_dim': [8, 416], 'batch_size': 8, 'flops': 8226419240192}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5062411866112}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5062411866112}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2847606704256}, {'type': 'perplexity', 'in_batch_dim': [8, 336], 'batch_size': 8, 'flops': 6644415553152}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6011614078336}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5062411866112}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4113209653888}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5695213340928}, {'type': 'perplexity', 'in_batch_dim': [8, 416], 'batch_size': 8, 'flops': 8226419240192}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3796808916480}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5695213340928}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4746011128704}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4429610391296}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4113209653888}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3480408179072}, {'type': 'perplexity', 'in_batch_dim': [8, 320], 'batch_size': 8, 'flops': 6328014815744}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3480408179072}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5378812603520}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6011614078336}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4429610391296}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5695213340928}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5695213340928}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3480408179072}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4429610391296}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5062411866112}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3480408179072}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4113209653888}, {'type': 'perplexity', 'in_batch_dim': [8, 352], 'batch_size': 8, 'flops': 6960816290560}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4113209653888}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2847606704256}, {'type': 'perplexity', 'in_batch_dim': [8, 160], 'batch_size': 8, 'flops': 3164007441664}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3796808916480}, {'type': 'perplexity', 'in_batch_dim': [8, 496], 'batch_size': 8, 'flops': 9808422927232}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3480408179072}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4113209653888}, {'type': 'perplexity', 'in_batch_dim': [8, 544], 'batch_size': 8, 'flops': 10757625139456}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6011614078336}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5378812603520}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5695213340928}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4746011128704}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5695213340928}, {'type': 'perplexity', 'in_batch_dim': [8, 384], 'batch_size': 8, 'flops': 7593617765376}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3480408179072}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5378812603520}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3480408179072}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5378812603520}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5062411866112}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3796808916480}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5062411866112}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5695213340928}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5378812603520}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4429610391296}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5062411866112}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3480408179072}, {'type': 'perplexity', 'in_batch_dim': [8, 656], 'batch_size': 8, 'flops': 12972430301312}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5062411866112}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3796808916480}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3480408179072}, {'type': 'perplexity', 'in_batch_dim': [8, 160], 'batch_size': 8, 'flops': 3164007441664}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5695213340928}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6011614078336}, {'type': 'perplexity', 'in_batch_dim': [8, 320], 'batch_size': 8, 'flops': 6328014815744}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5062411866112}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5062411866112}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5695213340928}, {'type': 'perplexity', 'in_batch_dim': [8, 320], 'batch_size': 8, 'flops': 6328014815744}, {'type': 'perplexity', 'in_batch_dim': [8, 384], 'batch_size': 8, 'flops': 7593617765376}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5062411866112}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4746011128704}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4113209653888}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4746011128704}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4746011128704}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3480408179072}, {'type': 'perplexity', 'in_batch_dim': [8, 320], 'batch_size': 8, 'flops': 6328014815744}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5695213340928}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5062411866112}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4113209653888}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4746011128704}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2847606704256}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3796808916480}, {'type': 'perplexity', 'in_batch_dim': [8, 336], 'batch_size': 8, 'flops': 6644415553152}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4113209653888}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4746011128704}, {'type': 'perplexity', 'in_batch_dim': [8, 496], 'batch_size': 8, 'flops': 9808422927232}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5378812603520}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5378812603520}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4429610391296}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5062411866112}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6011614078336}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4746011128704}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3796808916480}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4113209653888}, {'type': 'perplexity', 'in_batch_dim': [8, 496], 'batch_size': 8, 'flops': 9808422927232}, {'type': 'perplexity', 'in_batch_dim': [8, 448], 'batch_size': 8, 'flops': 8859220715008}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6011614078336}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3796808916480}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3480408179072}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5062411866112}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4429610391296}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4113209653888}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4746011128704}, {'type': 'perplexity', 'in_batch_dim': [8, 320], 'batch_size': 8, 'flops': 6328014815744}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6011614078336}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3480408179072}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3796808916480}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3796808916480}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4746011128704}, {'type': 'perplexity', 'in_batch_dim': [8, 416], 'batch_size': 8, 'flops': 8226419240192}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5695213340928}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6011614078336}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6011614078336}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4746011128704}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3480408179072}, {'type': 'perplexity', 'in_batch_dim': [8, 160], 'batch_size': 8, 'flops': 3164007441664}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5695213340928}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3480408179072}, {'type': 'perplexity', 'in_batch_dim': [8, 400], 'batch_size': 8, 'flops': 7910018502784}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4429610391296}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5378812603520}, {'type': 'perplexity', 'in_batch_dim': [8, 400], 'batch_size': 8, 'flops': 7910018502784}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5378812603520}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3480408179072}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6011614078336}, {'type': 'perplexity', 'in_batch_dim': [8, 160], 'batch_size': 8, 'flops': 3164007441664}, {'type': 'perplexity', 'in_batch_dim': [8, 352], 'batch_size': 8, 'flops': 6960816290560}, {'type': 'perplexity', 'in_batch_dim': [8, 320], 'batch_size': 8, 'flops': 6328014815744}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5062411866112}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5062411866112}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4113209653888}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4429610391296}, {'type': 'perplexity', 'in_batch_dim': [8, 352], 'batch_size': 8, 'flops': 6960816290560}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4113209653888}, {'type': 'perplexity', 'in_batch_dim': [8, 160], 'batch_size': 8, 'flops': 3164007441664}, {'type': 'perplexity', 'in_batch_dim': [8, 336], 'batch_size': 8, 'flops': 6644415553152}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4429610391296}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3796808916480}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5378812603520}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5378812603520}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4113209653888}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5378812603520}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3796808916480}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5378812603520}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5378812603520}, {'type': 'perplexity', 'in_batch_dim': [8, 160], 'batch_size': 8, 'flops': 3164007441664}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3480408179072}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6011614078336}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5695213340928}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5378812603520}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5378812603520}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4429610391296}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4746011128704}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6011614078336}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5062411866112}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6011614078336}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5378812603520}, {'type': 'perplexity', 'in_batch_dim': [8, 160], 'batch_size': 8, 'flops': 3164007441664}, {'type': 'perplexity', 'in_batch_dim': [8, 160], 'batch_size': 8, 'flops': 3164007441664}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5062411866112}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5062411866112}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3796808916480}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6011614078336}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4113209653888}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4746011128704}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5378812603520}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3480408179072}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3796808916480}, {'type': 'perplexity', 'in_batch_dim': [8, 320], 'batch_size': 8, 'flops': 6328014815744}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5062411866112}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4746011128704}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5062411866112}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4746011128704}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6011614078336}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3796808916480}, {'type': 'perplexity', 'in_batch_dim': [8, 160], 'batch_size': 8, 'flops': 3164007441664}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2847606704256}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4429610391296}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4746011128704}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5062411866112}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5378812603520}, {'type': 'perplexity', 'in_batch_dim': [8, 464], 'batch_size': 8, 'flops': 9175621452416}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4113209653888}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3480408179072}, {'type': 'perplexity', 'in_batch_dim': [8, 384], 'batch_size': 8, 'flops': 7593617765376}, {'type': 'perplexity', 'in_batch_dim': [8, 448], 'batch_size': 8, 'flops': 8859220715008}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5378812603520}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4113209653888}, {'type': 'perplexity', 'in_batch_dim': [8, 320], 'batch_size': 8, 'flops': 6328014815744}, {'type': 'perplexity', 'in_batch_dim': [8, 560], 'batch_size': 8, 'flops': 11074025876864}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5378812603520}, {'type': 'perplexity', 'in_batch_dim': [8, 160], 'batch_size': 8, 'flops': 3164007441664}, {'type': 'perplexity', 'in_batch_dim': [8, 320], 'batch_size': 8, 'flops': 6328014815744}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5062411866112}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6011614078336}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4746011128704}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6011614078336}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3796808916480}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4429610391296}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4113209653888}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4746011128704}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3480408179072}, {'type': 'perplexity', 'in_batch_dim': [8, 384], 'batch_size': 8, 'flops': 7593617765376}, {'type': 'perplexity', 'in_batch_dim': [8, 576], 'batch_size': 8, 'flops': 11390426614272}, {'type': 'perplexity', 'in_batch_dim': [8, 160], 'batch_size': 8, 'flops': 3164007441664}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4746011128704}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4746011128704}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4429610391296}, {'type': 'perplexity', 'in_batch_dim': [8, 336], 'batch_size': 8, 'flops': 6644415553152}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4746011128704}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3796808916480}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3796808916480}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5062411866112}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5378812603520}, {'type': 'perplexity', 'in_batch_dim': [8, 368], 'batch_size': 8, 'flops': 7277217027968}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3480408179072}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4113209653888}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4746011128704}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3480408179072}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4746011128704}, {'type': 'perplexity', 'in_batch_dim': [8, 1168], 'batch_size': 8, 'flops': 23097253898368}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5062411866112}, {'type': 'perplexity', 'in_batch_dim': [8, 160], 'batch_size': 8, 'flops': 3164007441664}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4746011128704}, {'type': 'perplexity', 'in_batch_dim': [8, 640], 'batch_size': 8, 'flops': 12656029563904}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5378812603520}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4429610391296}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4429610391296}, {'type': 'perplexity', 'in_batch_dim': [8, 384], 'batch_size': 8, 'flops': 7593617765376}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3480408179072}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4113209653888}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3796808916480}, {'type': 'perplexity', 'in_batch_dim': [8, 336], 'batch_size': 8, 'flops': 6644415553152}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3480408179072}, {'type': 'perplexity', 'in_batch_dim': [6, 208], 'batch_size': 8, 'flops': 4113209653888}], 'timestamp': '2025-09-10 02:30:55.373116', 'step': 7056, 'epoch': 3} {'type': 'pplx', 'content': 26144192.90405417, 'timestamp': '2025-09-10 02:30:55.376345', 'step': 7056, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 7593922352128}, 'timestamp': '2025-09-10 02:30:55.406505', 'step': 7056, 'epoch': 3} {'type': 'loss', 'content': 0.0005065679433755577, 'timestamp': '2025-09-10 02:30:55.410743', 'step': 7057, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 7593922352128}, 'timestamp': '2025-09-10 02:30:55.441447', 'step': 7057, 'epoch': 3} {'type': 'loss', 'content': 0.0013099844800308347, 'timestamp': '2025-09-10 02:30:55.449030', 'step': 7058, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 7593922352128}, 'timestamp': '2025-09-10 02:30:55.481859', 'step': 7058, 'epoch': 3} {'type': 'loss', 'content': 3.810837370110676e-05, 'timestamp': '2025-09-10 02:30:55.489460', 'step': 7059, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 7119318626112}, 'timestamp': '2025-09-10 02:30:55.521106', 'step': 7059, 'epoch': 3} {'type': 'loss', 'content': 0.0017356050666421652, 'timestamp': '2025-09-10 02:30:55.549027', 'step': 7060, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 7119318626112}, 'timestamp': '2025-09-10 02:30:55.580238', 'step': 7060, 'epoch': 3} {'type': 'loss', 'content': 0.00034908336238004267, 'timestamp': '2025-09-10 02:30:55.585395', 'step': 7061, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 8543129804160}, 'timestamp': '2025-09-10 02:30:55.616476', 'step': 7061, 'epoch': 3} {'type': 'loss', 'content': 5.2377414249349385e-05, 'timestamp': '2025-09-10 02:30:55.627386', 'step': 7062, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 176], 'flops': 5220903722048}, 'timestamp': '2025-09-10 02:30:55.657587', 'step': 7062, 'epoch': 3} {'type': 'loss', 'content': 0.0034754632506519556, 'timestamp': '2025-09-10 02:30:55.661652', 'step': 7063, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 352], 'flops': 10441544708224}, 'timestamp': '2025-09-10 02:30:55.696164', 'step': 7063, 'epoch': 3} {'type': 'loss', 'content': 0.0006347990711219609, 'timestamp': '2025-09-10 02:30:55.730783', 'step': 7064, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 7119318626112}, 'timestamp': '2025-09-10 02:30:55.763253', 'step': 7064, 'epoch': 3} {'type': 'loss', 'content': 4.8621186579111964e-05, 'timestamp': '2025-09-10 02:30:55.768247', 'step': 7065, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 656], 'flops': 19459015502528}, 'timestamp': '2025-09-10 02:30:55.823128', 'step': 7065, 'epoch': 3} {'type': 'loss', 'content': 3.0243045330280438e-05, 'timestamp': '2025-09-10 02:30:55.846549', 'step': 7066, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 8543129804160}, 'timestamp': '2025-09-10 02:30:55.877340', 'step': 7066, 'epoch': 3} {'type': 'loss', 'content': 0.00047078271745704114, 'timestamp': '2025-09-10 02:30:55.888271', 'step': 7067, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 7119318626112}, 'timestamp': '2025-09-10 02:30:55.919101', 'step': 7067, 'epoch': 3} {'type': 'loss', 'content': 4.756170528708026e-05, 'timestamp': '2025-09-10 02:30:55.947586', 'step': 7068, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 5695507448064}, 'timestamp': '2025-09-10 02:30:55.978287', 'step': 7068, 'epoch': 3} {'type': 'loss', 'content': 0.0010376720456406474, 'timestamp': '2025-09-10 02:30:55.980530', 'step': 7069, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 528], 'flops': 15662185694400}, 'timestamp': '2025-09-10 02:30:56.027381', 'step': 7069, 'epoch': 3} {'type': 'loss', 'content': 2.100633537338581e-05, 'timestamp': '2025-09-10 02:30:56.046434', 'step': 7070, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [3, 224], 'flops': 4983601869792}, 'timestamp': '2025-09-10 02:30:56.078505', 'step': 7070, 'epoch': 3} {'type': 'loss', 'content': 4.26122423959896e-05, 'timestamp': '2025-09-10 02:30:56.081611', 'step': 7071, 'epoch': 3} {'type': 'flops', 'content': [{'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5695213340928}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3796808916480}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4429610391296}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4746011128704}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4746011128704}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6011614078336}, {'type': 'perplexity', 'in_batch_dim': [8, 736], 'batch_size': 8, 'flops': 14554433988352}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3796808916480}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4113209653888}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3796808916480}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4746011128704}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2847606704256}, {'type': 'perplexity', 'in_batch_dim': [8, 320], 'batch_size': 8, 'flops': 6328014815744}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5378812603520}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3796808916480}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4746011128704}, {'type': 'perplexity', 'in_batch_dim': [8, 352], 'batch_size': 8, 'flops': 6960816290560}, {'type': 'perplexity', 'in_batch_dim': [8, 384], 'batch_size': 8, 'flops': 7593617765376}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5378812603520}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4429610391296}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4113209653888}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5062411866112}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5062411866112}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4429610391296}, {'type': 'perplexity', 'in_batch_dim': [8, 480], 'batch_size': 8, 'flops': 9492022189824}, {'type': 'perplexity', 'in_batch_dim': [8, 448], 'batch_size': 8, 'flops': 8859220715008}, {'type': 'perplexity', 'in_batch_dim': [8, 320], 'batch_size': 8, 'flops': 6328014815744}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3796808916480}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6011614078336}, {'type': 'perplexity', 'in_batch_dim': [8, 320], 'batch_size': 8, 'flops': 6328014815744}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5695213340928}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4746011128704}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4429610391296}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5378812603520}, {'type': 'perplexity', 'in_batch_dim': [8, 544], 'batch_size': 8, 'flops': 10757625139456}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4429610391296}, {'type': 'perplexity', 'in_batch_dim': [8, 320], 'batch_size': 8, 'flops': 6328014815744}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4113209653888}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4113209653888}, {'type': 'perplexity', 'in_batch_dim': [8, 320], 'batch_size': 8, 'flops': 6328014815744}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3480408179072}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5062411866112}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6011614078336}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4429610391296}, {'type': 'perplexity', 'in_batch_dim': [8, 384], 'batch_size': 8, 'flops': 7593617765376}, {'type': 'perplexity', 'in_batch_dim': [8, 416], 'batch_size': 8, 'flops': 8226419240192}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4746011128704}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3480408179072}, {'type': 'perplexity', 'in_batch_dim': [8, 384], 'batch_size': 8, 'flops': 7593617765376}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3480408179072}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3480408179072}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5378812603520}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3796808916480}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4113209653888}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5062411866112}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5378812603520}, {'type': 'perplexity', 'in_batch_dim': [8, 320], 'batch_size': 8, 'flops': 6328014815744}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4429610391296}, {'type': 'perplexity', 'in_batch_dim': [8, 160], 'batch_size': 8, 'flops': 3164007441664}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4113209653888}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5695213340928}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3796808916480}, {'type': 'perplexity', 'in_batch_dim': [8, 160], 'batch_size': 8, 'flops': 3164007441664}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6011614078336}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5062411866112}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4429610391296}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3480408179072}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4746011128704}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3480408179072}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4113209653888}, {'type': 'perplexity', 'in_batch_dim': [8, 624], 'batch_size': 8, 'flops': 12339628826496}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4113209653888}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5695213340928}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5062411866112}, {'type': 'perplexity', 'in_batch_dim': [8, 384], 'batch_size': 8, 'flops': 7593617765376}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4429610391296}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4113209653888}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5062411866112}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4429610391296}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5695213340928}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3796808916480}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5378812603520}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4429610391296}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5378812603520}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3796808916480}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5695213340928}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4429610391296}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5062411866112}, {'type': 'perplexity', 'in_batch_dim': [8, 160], 'batch_size': 8, 'flops': 3164007441664}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4746011128704}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5695213340928}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3796808916480}, {'type': 'perplexity', 'in_batch_dim': [8, 416], 'batch_size': 8, 'flops': 8226419240192}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3796808916480}, {'type': 'perplexity', 'in_batch_dim': [8, 160], 'batch_size': 8, 'flops': 3164007441664}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4746011128704}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3796808916480}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2847606704256}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5378812603520}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4429610391296}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4113209653888}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4429610391296}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6011614078336}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4113209653888}, {'type': 'perplexity', 'in_batch_dim': [8, 320], 'batch_size': 8, 'flops': 6328014815744}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4746011128704}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4746011128704}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3796808916480}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5062411866112}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3480408179072}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3796808916480}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3480408179072}, {'type': 'perplexity', 'in_batch_dim': [8, 352], 'batch_size': 8, 'flops': 6960816290560}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5062411866112}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4429610391296}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6011614078336}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5062411866112}, {'type': 'perplexity', 'in_batch_dim': [8, 576], 'batch_size': 8, 'flops': 11390426614272}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4429610391296}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3796808916480}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6011614078336}, {'type': 'perplexity', 'in_batch_dim': [8, 432], 'batch_size': 8, 'flops': 8542819977600}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5378812603520}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5378812603520}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5062411866112}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4113209653888}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4113209653888}, {'type': 'perplexity', 'in_batch_dim': [8, 368], 'batch_size': 8, 'flops': 7277217027968}, {'type': 'perplexity', 'in_batch_dim': [8, 160], 'batch_size': 8, 'flops': 3164007441664}, {'type': 'perplexity', 'in_batch_dim': [8, 368], 'batch_size': 8, 'flops': 7277217027968}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3796808916480}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4746011128704}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4113209653888}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4429610391296}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5695213340928}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5695213340928}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5062411866112}, {'type': 'perplexity', 'in_batch_dim': [8, 432], 'batch_size': 8, 'flops': 8542819977600}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5062411866112}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4113209653888}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3480408179072}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3480408179072}, {'type': 'perplexity', 'in_batch_dim': [8, 160], 'batch_size': 8, 'flops': 3164007441664}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4113209653888}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3796808916480}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5062411866112}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 368], 'batch_size': 8, 'flops': 7277217027968}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5378812603520}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5378812603520}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5062411866112}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2847606704256}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4429610391296}, {'type': 'perplexity', 'in_batch_dim': [8, 320], 'batch_size': 8, 'flops': 6328014815744}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3796808916480}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4429610391296}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4113209653888}, {'type': 'perplexity', 'in_batch_dim': [8, 384], 'batch_size': 8, 'flops': 7593617765376}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5378812603520}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4746011128704}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3480408179072}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5062411866112}, {'type': 'perplexity', 'in_batch_dim': [8, 320], 'batch_size': 8, 'flops': 6328014815744}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5378812603520}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4113209653888}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4113209653888}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5695213340928}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3480408179072}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6011614078336}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4746011128704}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5378812603520}, {'type': 'perplexity', 'in_batch_dim': [8, 320], 'batch_size': 8, 'flops': 6328014815744}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4429610391296}, {'type': 'perplexity', 'in_batch_dim': [8, 320], 'batch_size': 8, 'flops': 6328014815744}, {'type': 'perplexity', 'in_batch_dim': [8, 416], 'batch_size': 8, 'flops': 8226419240192}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5062411866112}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5062411866112}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2847606704256}, {'type': 'perplexity', 'in_batch_dim': [8, 336], 'batch_size': 8, 'flops': 6644415553152}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6011614078336}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5062411866112}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4113209653888}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5695213340928}, {'type': 'perplexity', 'in_batch_dim': [8, 416], 'batch_size': 8, 'flops': 8226419240192}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3796808916480}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5695213340928}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4746011128704}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4429610391296}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4113209653888}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3480408179072}, {'type': 'perplexity', 'in_batch_dim': [8, 320], 'batch_size': 8, 'flops': 6328014815744}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3480408179072}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5378812603520}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6011614078336}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4429610391296}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5695213340928}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5695213340928}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3480408179072}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4429610391296}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5062411866112}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3480408179072}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4113209653888}, {'type': 'perplexity', 'in_batch_dim': [8, 352], 'batch_size': 8, 'flops': 6960816290560}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4113209653888}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2847606704256}, {'type': 'perplexity', 'in_batch_dim': [8, 160], 'batch_size': 8, 'flops': 3164007441664}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3796808916480}, {'type': 'perplexity', 'in_batch_dim': [8, 496], 'batch_size': 8, 'flops': 9808422927232}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3480408179072}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4113209653888}, {'type': 'perplexity', 'in_batch_dim': [8, 544], 'batch_size': 8, 'flops': 10757625139456}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6011614078336}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5378812603520}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5695213340928}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4746011128704}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5695213340928}, {'type': 'perplexity', 'in_batch_dim': [8, 384], 'batch_size': 8, 'flops': 7593617765376}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3480408179072}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5378812603520}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3480408179072}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5378812603520}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5062411866112}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3796808916480}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5062411866112}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5695213340928}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5378812603520}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4429610391296}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5062411866112}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3480408179072}, {'type': 'perplexity', 'in_batch_dim': [8, 656], 'batch_size': 8, 'flops': 12972430301312}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5062411866112}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3796808916480}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3480408179072}, {'type': 'perplexity', 'in_batch_dim': [8, 160], 'batch_size': 8, 'flops': 3164007441664}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5695213340928}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6011614078336}, {'type': 'perplexity', 'in_batch_dim': [8, 320], 'batch_size': 8, 'flops': 6328014815744}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5062411866112}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5062411866112}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5695213340928}, {'type': 'perplexity', 'in_batch_dim': [8, 320], 'batch_size': 8, 'flops': 6328014815744}, {'type': 'perplexity', 'in_batch_dim': [8, 384], 'batch_size': 8, 'flops': 7593617765376}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5062411866112}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4746011128704}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4113209653888}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4746011128704}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4746011128704}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3480408179072}, {'type': 'perplexity', 'in_batch_dim': [8, 320], 'batch_size': 8, 'flops': 6328014815744}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5695213340928}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5062411866112}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4113209653888}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4746011128704}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2847606704256}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3796808916480}, {'type': 'perplexity', 'in_batch_dim': [8, 336], 'batch_size': 8, 'flops': 6644415553152}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4113209653888}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4746011128704}, {'type': 'perplexity', 'in_batch_dim': [8, 496], 'batch_size': 8, 'flops': 9808422927232}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5378812603520}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5378812603520}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4429610391296}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5062411866112}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6011614078336}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4746011128704}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3796808916480}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4113209653888}, {'type': 'perplexity', 'in_batch_dim': [8, 496], 'batch_size': 8, 'flops': 9808422927232}, {'type': 'perplexity', 'in_batch_dim': [8, 448], 'batch_size': 8, 'flops': 8859220715008}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6011614078336}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3796808916480}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3480408179072}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5062411866112}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4429610391296}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4113209653888}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4746011128704}, {'type': 'perplexity', 'in_batch_dim': [8, 320], 'batch_size': 8, 'flops': 6328014815744}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6011614078336}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3480408179072}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3796808916480}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3796808916480}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4746011128704}, {'type': 'perplexity', 'in_batch_dim': [8, 416], 'batch_size': 8, 'flops': 8226419240192}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5695213340928}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6011614078336}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6011614078336}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4746011128704}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3480408179072}, {'type': 'perplexity', 'in_batch_dim': [8, 160], 'batch_size': 8, 'flops': 3164007441664}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5695213340928}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3480408179072}, {'type': 'perplexity', 'in_batch_dim': [8, 400], 'batch_size': 8, 'flops': 7910018502784}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4429610391296}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5378812603520}, {'type': 'perplexity', 'in_batch_dim': [8, 400], 'batch_size': 8, 'flops': 7910018502784}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5378812603520}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3480408179072}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6011614078336}, {'type': 'perplexity', 'in_batch_dim': [8, 160], 'batch_size': 8, 'flops': 3164007441664}, {'type': 'perplexity', 'in_batch_dim': [8, 352], 'batch_size': 8, 'flops': 6960816290560}, {'type': 'perplexity', 'in_batch_dim': [8, 320], 'batch_size': 8, 'flops': 6328014815744}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5062411866112}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5062411866112}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4113209653888}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4429610391296}, {'type': 'perplexity', 'in_batch_dim': [8, 352], 'batch_size': 8, 'flops': 6960816290560}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4113209653888}, {'type': 'perplexity', 'in_batch_dim': [8, 160], 'batch_size': 8, 'flops': 3164007441664}, {'type': 'perplexity', 'in_batch_dim': [8, 336], 'batch_size': 8, 'flops': 6644415553152}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4429610391296}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3796808916480}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5378812603520}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5378812603520}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4113209653888}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5378812603520}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3796808916480}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5378812603520}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5378812603520}, {'type': 'perplexity', 'in_batch_dim': [8, 160], 'batch_size': 8, 'flops': 3164007441664}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3480408179072}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6011614078336}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5695213340928}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5378812603520}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5378812603520}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4429610391296}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4746011128704}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6011614078336}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5062411866112}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6011614078336}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5378812603520}, {'type': 'perplexity', 'in_batch_dim': [8, 160], 'batch_size': 8, 'flops': 3164007441664}, {'type': 'perplexity', 'in_batch_dim': [8, 160], 'batch_size': 8, 'flops': 3164007441664}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5062411866112}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5062411866112}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3796808916480}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6011614078336}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4113209653888}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4746011128704}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5378812603520}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3480408179072}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3796808916480}, {'type': 'perplexity', 'in_batch_dim': [8, 320], 'batch_size': 8, 'flops': 6328014815744}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5062411866112}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4746011128704}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5062411866112}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4746011128704}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6011614078336}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3796808916480}, {'type': 'perplexity', 'in_batch_dim': [8, 160], 'batch_size': 8, 'flops': 3164007441664}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2847606704256}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4429610391296}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4746011128704}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5062411866112}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5378812603520}, {'type': 'perplexity', 'in_batch_dim': [8, 464], 'batch_size': 8, 'flops': 9175621452416}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4113209653888}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3480408179072}, {'type': 'perplexity', 'in_batch_dim': [8, 384], 'batch_size': 8, 'flops': 7593617765376}, {'type': 'perplexity', 'in_batch_dim': [8, 448], 'batch_size': 8, 'flops': 8859220715008}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5378812603520}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4113209653888}, {'type': 'perplexity', 'in_batch_dim': [8, 320], 'batch_size': 8, 'flops': 6328014815744}, {'type': 'perplexity', 'in_batch_dim': [8, 560], 'batch_size': 8, 'flops': 11074025876864}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5378812603520}, {'type': 'perplexity', 'in_batch_dim': [8, 160], 'batch_size': 8, 'flops': 3164007441664}, {'type': 'perplexity', 'in_batch_dim': [8, 320], 'batch_size': 8, 'flops': 6328014815744}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5062411866112}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6011614078336}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4746011128704}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6011614078336}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3796808916480}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4429610391296}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4113209653888}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4746011128704}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3480408179072}, {'type': 'perplexity', 'in_batch_dim': [8, 384], 'batch_size': 8, 'flops': 7593617765376}, {'type': 'perplexity', 'in_batch_dim': [8, 576], 'batch_size': 8, 'flops': 11390426614272}, {'type': 'perplexity', 'in_batch_dim': [8, 160], 'batch_size': 8, 'flops': 3164007441664}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4746011128704}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4746011128704}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4429610391296}, {'type': 'perplexity', 'in_batch_dim': [8, 336], 'batch_size': 8, 'flops': 6644415553152}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4746011128704}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3796808916480}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3796808916480}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5062411866112}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5378812603520}, {'type': 'perplexity', 'in_batch_dim': [8, 368], 'batch_size': 8, 'flops': 7277217027968}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3480408179072}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4113209653888}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4746011128704}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3480408179072}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4746011128704}, {'type': 'perplexity', 'in_batch_dim': [8, 1168], 'batch_size': 8, 'flops': 23097253898368}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5062411866112}, {'type': 'perplexity', 'in_batch_dim': [8, 160], 'batch_size': 8, 'flops': 3164007441664}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4746011128704}, {'type': 'perplexity', 'in_batch_dim': [8, 640], 'batch_size': 8, 'flops': 12656029563904}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5378812603520}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4429610391296}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4429610391296}, {'type': 'perplexity', 'in_batch_dim': [8, 384], 'batch_size': 8, 'flops': 7593617765376}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3480408179072}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4113209653888}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3796808916480}, {'type': 'perplexity', 'in_batch_dim': [8, 336], 'batch_size': 8, 'flops': 6644415553152}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3480408179072}, {'type': 'perplexity', 'in_batch_dim': [6, 208], 'batch_size': 8, 'flops': 4113209653888}], 'timestamp': '2025-09-10 02:31:06.337196', 'step': 7071, 'epoch': 3} {'type': 'pplx', 'content': 26398362.306229845, 'timestamp': '2025-09-10 02:31:06.344312', 'step': 7071, 'epoch': 3} {'type': 'best_pplx', 'content': 12191892.104022551, 'timestamp': '2025-09-10 02:31:06.346146', 'step': 7071, 'epoch': 3} {'type': 'best_step', 'content': 147, 'timestamp': '2025-09-10 02:31:06.347858', 'step': 7071, 'epoch': 3} {'type': 'total_pplx_flops', 'content': 105693667713235200, 'timestamp': '2025-09-10 02:31:06.349825', 'step': 7071, 'epoch': 3} {'type': 'total_train_flops', 'content': 53674555878669600, 'timestamp': '2025-09-10 02:31:06.352004', 'step': 7071, 'epoch': 3}